diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh index 4fd88ea81c84a8..91e719c52d4363 100755 --- a/.ci/monolithic-windows.sh +++ b/.ci/monolithic-windows.sh @@ -44,6 +44,8 @@ pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt # see https://github.com/llvm/llvm-project/pull/82393 and # https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840/40 # for further information. +# We limit the number of parallel compile jobs to 24 control memory +# consumption and improve build reliability. cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D LLVM_ENABLE_PROJECTS="${projects}" \ -G Ninja \ @@ -58,7 +60,9 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \ -D MLIR_ENABLE_BINDINGS_PYTHON=ON \ -D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \ -D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \ - -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" + -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" \ + -D LLVM_PARALLEL_COMPILE_JOBS=16 \ + -D LLVM_PARALLEL_LINK_JOBS=4 echo "--- ninja" # Targets are not escaped as they are passed as separate arguments. diff --git a/.github/workflows/restart-preempted-libcxx-jobs.yaml b/.github/workflows/restart-preempted-libcxx-jobs.yaml new file mode 100644 index 00000000000000..88924fb3cd7791 --- /dev/null +++ b/.github/workflows/restart-preempted-libcxx-jobs.yaml @@ -0,0 +1,107 @@ +name: Restart Preempted Libc++ Workflow + +# The libc++ builders run on preemptable VMs, which can be shutdown at any time. +# This workflow identifies when a workflow run was canceled due to the VM being preempted, +# and restarts the workflow run. + +# We identify a canceled workflow run by checking the annotations of the check runs in the check suite, +# which should contain the message "The runner has received a shutdown signal." + +# Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow. + +on: + workflow_run: + workflows: [Build and Test libc\+\+] + types: + - completed + +permissions: + contents: read + +jobs: + restart: + if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled') + name: "Restart Job" + permissions: + statuses: read + checks: read + actions: write + runs-on: ubuntu-latest + steps: + - name: "Restart Job" + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1 + with: + script: | + const failure_regex = /Process completed with exit code 1./ + const preemption_regex = /The runner has received a shutdown signal/ + + console.log('Listing check runs for suite') + const check_suites = await github.rest.checks.listForSuite({ + owner: context.repo.owner, + repo: context.repo.repo, + check_suite_id: context.payload.workflow_run.check_suite_id + }) + + check_run_ids = []; + for (check_run of check_suites.data.check_runs) { + console.log('Checking check run: ' + check_run.id); + console.log(check_run); + if (check_run.status != 'completed') { + console.log('Check run was not completed. Skipping.'); + continue; + } + if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') { + console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.'); + continue; + } + check_run_ids.push(check_run.id); + } + + has_preempted_job = false; + + for (check_run_id of check_run_ids) { + console.log('Listing annotations for check run: ' + check_run_id); + + annotations = await github.rest.checks.listAnnotations({ + owner: context.repo.owner, + repo: context.repo.repo, + check_run_id: check_run_id + }) + + console.log(annotations); + for (annotation of annotations.data) { + if (annotation.annotation_level != 'failure') { + continue; + } + + const preemption_match = annotation.message.match(preemption_regex); + + if (preemption_match != null) { + console.log('Found preemption message: ' + annotation.message); + has_preempted_job = true; + } + + const failure_match = annotation.message.match(failure_regex); + if (failure_match != null) { + // We only want to restart the workflow if all of the failures were due to preemption. + // We don't want to restart the workflow if there were other failures. + console.log('Choosing not to rerun workflow because we found a non-preemption failure'); + console.log('Failure message: ' + annotation.message); + return; + } + } + } + + if (!has_preempted_job) { + console.log('No preempted jobs found. Not restarting workflow.'); + return; + } + + console.log("Restarted workflow: " + context.payload.workflow_run.id); + await github.rest.actions.reRunWorkflowFailedJobs({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: context.payload.workflow_run.id + }) + + diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index c3208392df1566..828f13805a6980 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1414,13 +1414,21 @@ IdentifierNamingCheck::getDiagInfo(const NamingCheckId &ID, }}; } +StringRef IdentifierNamingCheck::getRealFileName(StringRef FileName) const { + auto Iter = RealFileNameCache.try_emplace(FileName); + SmallString<256U> &RealFileName = Iter.first->getValue(); + if (!Iter.second) + return RealFileName; + llvm::sys::fs::real_path(FileName, RealFileName); + return RealFileName; +} + const IdentifierNamingCheck::FileStyle & IdentifierNamingCheck::getStyleForFile(StringRef FileName) const { if (!GetConfigPerFile) return *MainFileStyle; - SmallString<128> RealFileName; - llvm::sys::fs::real_path(FileName, RealFileName); + StringRef RealFileName = getRealFileName(FileName); StringRef Parent = llvm::sys::path::parent_path(RealFileName); auto Iter = NamingStylesCache.find(Parent); if (Iter != NamingStylesCache.end()) diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h index 27c8e4bc768c40..646ec0eac8dd1c 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h @@ -205,6 +205,7 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { const NamingCheckFailure &Failure) const override; const FileStyle &getStyleForFile(StringRef FileName) const; + StringRef getRealFileName(StringRef FileName) const; /// Find the style kind of a field in an anonymous record. StyleKind findStyleKindForAnonField( @@ -222,6 +223,7 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { /// Stores the style options as a vector, indexed by the specified \ref /// StyleKind, for a given directory. mutable llvm::StringMap NamingStylesCache; + mutable llvm::StringMap> RealFileNameCache; FileStyle *MainFileStyle; ClangTidyContext *Context; const bool GetConfigPerFile; diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst index 44d97f7b363bff..271970c292c8fa 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/identifier-length.rst @@ -28,10 +28,7 @@ The following options are described below: .. code-block:: c++ - int doubler(int x) // warns that x is too short - { - return 2 * x; - } + int i = 42; // warns that 'i' is too short This check does not have any fix suggestions in the general case since variable names have semantic value. @@ -50,7 +47,10 @@ The following options are described below: .. code-block:: c++ - int i = 42; // warns that 'i' is too short + int doubler(int x) // warns that x is too short + { + return 2 * x; + } This check does not have any fix suggestions in the general case since variable names have semantic value. diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index b3e2b870ae5f9a..3d21e37784b363 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -123,6 +123,44 @@ severe that error recovery won't be able to recover sensibly from them (thus spewing a ton of bogus errors). One example of this class of error are failure to ``#include`` a file. +Diagnostic Wording +^^^^^^^^^^^^^^^^^^ +The wording used for a diagnostic is critical because it is the only way for a +user to know how to correct their code. Use the following suggestions when +wording a diagnostic. + +* Diagnostics in Clang do not start with a capital letter and do not end with + punctuation. + + * This does not apply to proper nouns like ``Clang`` or ``OpenMP``, to + acronyms like ``GCC`` or ``ARC``, or to language standards like ``C23`` + or ``C++17``. + * A trailing question mark is allowed. e.g., ``unknown identifier %0; did + you mean %1?``. + +* Appropriately capitalize proper nouns like ``Clang``, ``OpenCL``, ``GCC``, + ``Objective-C``, etc and language standard versions like ``C11`` or ``C++11``. +* The wording should be succinct. If necessary, use a semicolon to combine + sentence fragments instead of using complete sentences. e.g., prefer wording + like ``'%0' is deprecated; it will be removed in a future release of Clang`` + over wording like ``'%0' is deprecated. It will be removed in a future release + of Clang``. +* The wording should be actionable and avoid using standards terms or grammar + productions that a new user would not be familiar with. e.g., prefer wording + like ``missing semicolon`` over wording like ``syntax error`` (which is not + actionable) or ``expected unqualified-id`` (which uses standards terminology). +* The wording should clearly explain what is wrong with the code rather than + restating what the code does. e.g., prefer wording like ``type %0 requires a + value in the range %1 to %2`` over wording like ``%0 is invalid``. +* The wording should have enough contextual information to help the user + identify the issue in a complex expression. e.g., prefer wording like + ``both sides of the %0 binary operator are identical`` over wording like + ``identical operands to binary operator``. +* Use single quotes to denote syntactic constructs or command line arguments + named in a diagnostic message. e.g., prefer wording like ``'this' pointer + cannot be null in well-defined C++ code`` over wording like ``this pointer + cannot be null in well-defined C++ code``. + The Format String ^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 182f8b5824258e..9091f6341bd9b8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -541,6 +541,9 @@ Improvements to Clang's diagnostics - Clang emits a ``-Wparentheses`` warning for expressions with consecutive comparisons like ``x < y < z``. Fixes #GH20456. +- Clang no longer emits a "declared here" note for a builtin function that has no declaration in source. + Fixes #GH93369. + Improvements to Clang's time-trace ---------------------------------- @@ -629,6 +632,9 @@ Bug Fixes in This Version - ``__is_array`` and ``__is_bounded_array`` no longer return ``true`` for zero-sized arrays. Fixes (#GH54705). +- Correctly reject declarations where a statement is required in C. + Fixes #GH92775 + Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -802,6 +808,8 @@ Bug Fixes to C++ Support - Fixed a regression introduced in Clang 18 causing a static function overloading a non-static function with the same parameters not to be diagnosed. (Fixes #GH93456). - Clang now diagnoses unexpanded parameter packs in attributes. (Fixes #GH93269). +- Clang now allows ``@$``` in raw string literals. Fixes (#GH93130). +- Fix an assertion failure when checking invalid ``this`` usage in the wrong context. (Fixes #GH91536). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h index b706864798baaf..04daf511f58713 100644 --- a/clang/include/clang/AST/StmtOpenACC.h +++ b/clang/include/clang/AST/StmtOpenACC.h @@ -31,6 +31,8 @@ class OpenACCConstructStmt : public Stmt { /// The location of the directive statement, from the '#' to the last token of /// the directive. SourceRange Range; + /// The location of the directive name. + SourceLocation DirectiveLoc; /// The list of clauses. This is stored here as an ArrayRef, as this is the /// most convienient place to access the list, however the list itself should @@ -39,8 +41,9 @@ class OpenACCConstructStmt : public Stmt { protected: OpenACCConstructStmt(StmtClass SC, OpenACCDirectiveKind K, - SourceLocation Start, SourceLocation End) - : Stmt(SC), Kind(K), Range(Start, End) {} + SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation End) + : Stmt(SC), Kind(K), Range(Start, End), DirectiveLoc(DirectiveLoc) {} // Used only for initialization, the leaf class can initialize this to // trailing storage. @@ -59,6 +62,7 @@ class OpenACCConstructStmt : public Stmt { SourceLocation getBeginLoc() const { return Range.getBegin(); } SourceLocation getEndLoc() const { return Range.getEnd(); } + SourceLocation getDirectiveLoc() const { return DirectiveLoc; } ArrayRef clauses() const { return Clauses; } child_range children() { @@ -81,9 +85,11 @@ class OpenACCAssociatedStmtConstruct : public OpenACCConstructStmt { protected: OpenACCAssociatedStmtConstruct(StmtClass SC, OpenACCDirectiveKind K, - SourceLocation Start, SourceLocation End, - Stmt *AssocStmt) - : OpenACCConstructStmt(SC, K, Start, End), AssociatedStmt(AssocStmt) {} + SourceLocation Start, + SourceLocation DirectiveLoc, + SourceLocation End, Stmt *AssocStmt) + : OpenACCConstructStmt(SC, K, Start, DirectiveLoc, End), + AssociatedStmt(AssocStmt) {} void setAssociatedStmt(Stmt *S) { AssociatedStmt = S; } Stmt *getAssociatedStmt() { return AssociatedStmt; } @@ -126,10 +132,10 @@ class OpenACCComputeConstruct final friend class ASTStmtReader; friend class ASTContext; OpenACCComputeConstruct(unsigned NumClauses) - : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, - OpenACCDirectiveKind::Invalid, - SourceLocation{}, SourceLocation{}, - /*AssociatedStmt=*/nullptr) { + : OpenACCAssociatedStmtConstruct( + OpenACCComputeConstructClass, OpenACCDirectiveKind::Invalid, + SourceLocation{}, SourceLocation{}, SourceLocation{}, + /*AssociatedStmt=*/nullptr) { // We cannot send the TrailingObjects storage to the base class (which holds // a reference to the data) until it is constructed, so we have to set it // separately here. @@ -141,11 +147,11 @@ class OpenACCComputeConstruct final } OpenACCComputeConstruct(OpenACCDirectiveKind K, SourceLocation Start, - SourceLocation End, + SourceLocation DirectiveLoc, SourceLocation End, ArrayRef Clauses, Stmt *StructuredBlock) : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, K, Start, - End, StructuredBlock) { + DirectiveLoc, End, StructuredBlock) { assert(isOpenACCComputeDirectiveKind(K) && "Only parallel, serial, and kernels constructs should be " "represented by this type"); @@ -169,8 +175,8 @@ class OpenACCComputeConstruct final unsigned NumClauses); static OpenACCComputeConstruct * Create(const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc, - SourceLocation EndLoc, ArrayRef Clauses, - Stmt *StructuredBlock); + SourceLocation DirectiveLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *StructuredBlock); Stmt *getStructuredBlock() { return getAssociatedStmt(); } const Stmt *getStructuredBlock() const { diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index e59cccccdd3690..ef9df1e9d8b4aa 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2025,9 +2025,12 @@ def Convergent : InheritableAttr { def NoInline : DeclOrStmtAttr { let Spellings = [CustomKeyword<"__noinline__">, GCC<"noinline">, CXX11<"clang", "noinline">, C23<"clang", "noinline">, + CXX11<"msvc", "noinline">, C23<"msvc", "noinline">, Declspec<"noinline">]; - let Accessors = [Accessor<"isClangNoInline", [CXX11<"clang", "noinline">, - C23<"clang", "noinline">]>]; + let Accessors = [Accessor<"isStmtNoInline", [CXX11<"clang", "noinline">, + C23<"clang", "noinline">, + CXX11<"msvc", "noinline">, + C23<"msvc", "noinline">]>]; let Documentation = [NoInlineDocs]; let Subjects = SubjectList<[Function, Stmt], WarnDiag, "functions and statements">; diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index fd8c1b480d6da0..4e48ff48b60f5f 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -135,6 +135,10 @@ TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_min_f16x8, "V8hV8hV8h", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "half-precision") TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128") diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index d8079553118287..d71857e8e5dcc3 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -28,8 +28,7 @@ namespace charinfo { CHAR_LOWER = 0x0040, // a-z CHAR_UNDER = 0x0080, // _ CHAR_PERIOD = 0x0100, // . - CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' - CHAR_PUNCT = 0x0400 // `$@() + CHAR_PUNCT = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'`$@() }; enum { @@ -152,7 +151,7 @@ LLVM_READONLY inline bool isHexDigit(unsigned char c) { /// Note that '_' is both a punctuation character and an identifier character! LLVM_READONLY inline bool isPunctuation(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; + return (InfoTable[c] & (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT)) != 0; } /// Return true if this character is an ASCII printable character; that is, a @@ -160,8 +159,8 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) { /// terminal. LLVM_READONLY inline bool isPrintable(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| - CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; + return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT | + CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0; } /// Return true if this is the body character of a C preprocessing number, @@ -175,8 +174,9 @@ LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) { /// Return true if this is the body character of a C++ raw string delimiter. LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| - CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; + return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT | + CHAR_UNDER | CHAR_PUNCT)) != 0 && + c != '(' && c != ')'; } enum class EscapeChar { diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 5a4551a96ca4e7..25fbfe83fa2bcf 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -111,6 +111,14 @@ def warn_cxx98_compat_raw_string_literal : Warning< "raw string literals are incompatible with C++98">, InGroup, DefaultIgnore; +def warn_cxx26_compat_raw_string_literal_character_set : Warning< + " '%0' in a raw string literal delimiter is incompatible " + "with standards before C++2c">, + InGroup, DefaultIgnore; +def ext_cxx26_raw_string_literal_character_set : Extension< + " '%0' in a raw string literal delimiter is a C++2c extension">, + InGroup, DefaultIgnore; + def warn_multichar_character_literal : Warning< "multi-character character constant">, InGroup; def warn_four_char_character_literal : Warning< diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 8493026f5f7a69..d054b8cf0d2405 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -467,15 +467,18 @@ class Parser : public CodeCompletionHandler { /// Flags describing a context in which we're parsing a statement. enum class ParsedStmtContext { + /// This context permits declarations in language modes where declarations + /// are not statements. + AllowDeclarationsInC = 0x1, /// This context permits standalone OpenMP directives. - AllowStandaloneOpenMPDirectives = 0x1, + AllowStandaloneOpenMPDirectives = 0x2, /// This context is at the top level of a GNU statement expression. - InStmtExpr = 0x2, + InStmtExpr = 0x4, /// The context of a regular substatement. SubStmt = 0, /// The context of a compound-statement. - Compound = AllowStandaloneOpenMPDirectives, + Compound = AllowDeclarationsInC | AllowStandaloneOpenMPDirectives, LLVM_MARK_AS_BITMASK_ENUM(InStmtExpr) }; @@ -3656,6 +3659,7 @@ class Parser : public CodeCompletionHandler { struct OpenACCDirectiveParseInfo { OpenACCDirectiveKind DirKind; SourceLocation StartLoc; + SourceLocation DirLoc; SourceLocation EndLoc; SmallVector Clauses; // TODO OpenACC: As we implement support for the Atomic, Routine, Cache, and diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 6f69fa08939b82..66144de4340a8a 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -379,7 +379,7 @@ class SemaOpenACC : public SemaBase { /// Called after the construct has been parsed, but clauses haven't been /// parsed. This allows us to diagnose not-implemented, as well as set up any /// state required for parsing the clauses. - void ActOnConstruct(OpenACCDirectiveKind K, SourceLocation StartLoc); + void ActOnConstruct(OpenACCDirectiveKind K, SourceLocation DirLoc); /// Called after the directive, including its clauses, have been parsed and /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES @@ -400,6 +400,7 @@ class SemaOpenACC : public SemaBase { /// declaration group or associated statement. StmtResult ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult AssocStmt); diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp index 8c77b563657d90..d8e33ff421c06c 100644 --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -90,7 +90,7 @@ QualType APValue::LValueBase::getType() const { // For a materialized temporary, the type of the temporary we materialized // may not be the type of the expression. if (const MaterializeTemporaryExpr *MTE = - clang::dyn_cast(Base)) { + llvm::dyn_cast(Base)) { SmallVector CommaLHSs; SmallVector Adjustments; const Expr *Temp = MTE->getSubExpr(); diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index a381a8dd7b62c3..47899b344c97ab 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -23,15 +23,14 @@ OpenACCComputeConstruct::CreateEmpty(const ASTContext &C, unsigned NumClauses) { return Inst; } -OpenACCComputeConstruct * -OpenACCComputeConstruct::Create(const ASTContext &C, OpenACCDirectiveKind K, - SourceLocation BeginLoc, SourceLocation EndLoc, - ArrayRef Clauses, - Stmt *StructuredBlock) { +OpenACCComputeConstruct *OpenACCComputeConstruct::Create( + const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc, + SourceLocation DirLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *StructuredBlock) { void *Mem = C.Allocate( OpenACCComputeConstruct::totalSizeToAlloc( Clauses.size())); - auto *Inst = new (Mem) - OpenACCComputeConstruct(K, BeginLoc, EndLoc, Clauses, StructuredBlock); + auto *Inst = new (Mem) OpenACCComputeConstruct(K, BeginLoc, DirLoc, EndLoc, + Clauses, StructuredBlock); return Inst; } diff --git a/clang/lib/Analysis/MacroExpansionContext.cpp b/clang/lib/Analysis/MacroExpansionContext.cpp index 564e359668a510..b212b7f2457927 100644 --- a/clang/lib/Analysis/MacroExpansionContext.cpp +++ b/clang/lib/Analysis/MacroExpansionContext.cpp @@ -12,7 +12,7 @@ #define DEBUG_TYPE "macro-expansion-context" -static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, +static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, clang::Token Tok); namespace clang { diff --git a/clang/lib/Basic/CharInfo.cpp b/clang/lib/Basic/CharInfo.cpp index d02054c9718f5f..26d693b8e9b943 100644 --- a/clang/lib/Basic/CharInfo.cpp +++ b/clang/lib/Basic/CharInfo.cpp @@ -31,20 +31,20 @@ const uint16_t clang::charinfo::InfoTable[256] = { 0 , 0 , 0 , 0 , //32 SP 33 ! 34 " 35 # //36 $ 37 % 38 & 39 ' - CHAR_SPACE , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_SPACE , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , //40 ( 41 ) 42 * 43 + //44 , 45 - 46 . 47 / - CHAR_PUNCT , CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PERIOD , CHAR_PUNCT , //48 0 49 1 50 2 51 3 //52 4 53 5 54 6 55 7 CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , //56 8 57 9 58 : 59 ; //60 < 61 = 62 > 63 ? - CHAR_DIGIT , CHAR_DIGIT , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_DIGIT , CHAR_DIGIT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , //64 @ 65 A 66 B 67 C //68 D 69 E 70 F 71 G CHAR_PUNCT , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , @@ -59,8 +59,8 @@ const uint16_t clang::charinfo::InfoTable[256] = { CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , //88 X 89 Y 90 Z 91 [ //92 \ 93 ] 94 ^ 95 _ - CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_RAWDEL , - CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_UNDER , //96 ` 97 a 98 b 99 c //100 d 101 e 102 f 103 g CHAR_PUNCT , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , @@ -75,6 +75,6 @@ const uint16_t clang::charinfo::InfoTable[256] = { CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , //120 x 121 y 122 z 123 { //124 | 125 } 126 ~ 127 DEL - CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0 + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , 0 }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5edf8c79709131..a3c65105033247 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -20806,6 +20806,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_min_f32: case WebAssembly::BI__builtin_wasm_min_f64: + case WebAssembly::BI__builtin_wasm_min_f16x8: case WebAssembly::BI__builtin_wasm_min_f32x4: case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20816,6 +20817,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } case WebAssembly::BI__builtin_wasm_max_f32: case WebAssembly::BI__builtin_wasm_max_f64: + case WebAssembly::BI__builtin_wasm_max_f16x8: case WebAssembly::BI__builtin_wasm_max_f32x4: case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20824,6 +20826,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f16x8: case WebAssembly::BI__builtin_wasm_pmin_f32x4: case WebAssembly::BI__builtin_wasm_pmin_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -20832,6 +20835,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmax_f16x8: case WebAssembly::BI__builtin_wasm_pmax_f32x4: case WebAssembly::BI__builtin_wasm_pmax_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 76704c4d7be4a4..db8e6f55302adc 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -1340,7 +1340,7 @@ void CodeGenPGO::setProfileVersion(llvm::Module &M) { llvm::APInt(64, ProfileVersion)), VarName); - IRLevelVersionVariable->setVisibility(llvm::GlobalValue::DefaultVisibility); + IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility); llvm::Triple TT(M.getTargetTriple()); if (TT.supportsCOMDAT()) { IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 9849c59685cca7..b141e5f2adfab1 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2227,10 +2227,19 @@ void Generic_GCC::GCCInstallationDetector::init( SmallVector CandidateBiarchTripleAliases; // Add some triples that we want to check first. CandidateTripleAliases.push_back(TargetTriple.str()); - std::string TripleNoVendor = TargetTriple.getArchName().str() + "-" + - TargetTriple.getOSAndEnvironmentName().str(); - if (TargetTriple.getVendor() == llvm::Triple::UnknownVendor) + std::string TripleNoVendor, BiarchTripleNoVendor; + if (TargetTriple.getVendor() == llvm::Triple::UnknownVendor) { + StringRef OSEnv = TargetTriple.getOSAndEnvironmentName(); + if (TargetTriple.getEnvironment() == llvm::Triple::GNUX32) + OSEnv = "linux-gnu"; + TripleNoVendor = (TargetTriple.getArchName().str() + '-' + OSEnv).str(); CandidateTripleAliases.push_back(TripleNoVendor); + if (BiarchVariantTriple.getArch() != llvm::Triple::UnknownArch) { + BiarchTripleNoVendor = + (BiarchVariantTriple.getArchName().str() + '-' + OSEnv).str(); + CandidateBiarchTripleAliases.push_back(BiarchTripleNoVendor); + } + } CollectLibDirsAndTriples(TargetTriple, BiarchVariantTriple, CandidateLibDirs, CandidateTripleAliases, CandidateBiarchLibDirs, @@ -2453,11 +2462,9 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // lists should shrink over time. Please don't add more elements to *Triples. static const char *const AArch64LibDirs[] = {"/lib64", "/lib"}; static const char *const AArch64Triples[] = { - "aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux", - "aarch64-suse-linux"}; + "aarch64-none-linux-gnu", "aarch64-redhat-linux", "aarch64-suse-linux"}; static const char *const AArch64beLibDirs[] = {"/lib"}; - static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu", - "aarch64_be-linux-gnu"}; + static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu"}; static const char *const ARMLibDirs[] = {"/lib"}; static const char *const ARMTriples[] = {"arm-linux-gnueabi"}; @@ -2482,9 +2489,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "x86_64-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-linux-gnu", "x86_64-redhat-linux6E", "x86_64-redhat-linux", "x86_64-suse-linux", - "x86_64-manbo-linux-gnu", "x86_64-linux-gnu", - "x86_64-slackware-linux", "x86_64-unknown-linux", - "x86_64-amazon-linux"}; + "x86_64-manbo-linux-gnu", "x86_64-slackware-linux", + "x86_64-unknown-linux", "x86_64-amazon-linux"}; static const char *const X32Triples[] = {"x86_64-linux-gnux32", "x86_64-pc-linux-gnux32"}; static const char *const X32LibDirs[] = {"/libx32", "/lib"}; @@ -2500,26 +2506,24 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; static const char *const M68kLibDirs[] = {"/lib"}; - static const char *const M68kTriples[] = { - "m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"}; + static const char *const M68kTriples[] = {"m68k-unknown-linux-gnu", + "m68k-suse-linux"}; static const char *const MIPSLibDirs[] = {"/libo32", "/lib"}; static const char *const MIPSTriples[] = { "mips-linux-gnu", "mips-mti-linux", "mips-mti-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6-linux-gnu"}; static const char *const MIPSELLibDirs[] = {"/libo32", "/lib"}; - static const char *const MIPSELTriples[] = { - "mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu"}; + static const char *const MIPSELTriples[] = {"mipsel-linux-gnu", + "mips-img-linux-gnu"}; static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"}; static const char *const MIPS64Triples[] = { - "mips64-linux-gnu", "mips-mti-linux-gnu", - "mips-img-linux-gnu", "mips64-linux-gnuabi64", + "mips-mti-linux-gnu", "mips-img-linux-gnu", "mips64-linux-gnuabi64", "mipsisa64r6-linux-gnu", "mipsisa64r6-linux-gnuabi64"}; static const char *const MIPS64ELLibDirs[] = {"/lib64", "/lib"}; static const char *const MIPS64ELTriples[] = { - "mips64el-linux-gnu", "mips-mti-linux-gnu", - "mips-img-linux-gnu", "mips64el-linux-gnuabi64", + "mips-mti-linux-gnu", "mips-img-linux-gnu", "mips64el-linux-gnuabi64", "mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"}; static const char *const MIPSN32LibDirs[] = {"/lib32"}; @@ -2534,46 +2538,39 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( static const char *const PPCLibDirs[] = {"/lib32", "/lib"}; static const char *const PPCTriples[] = { - "powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe", + "powerpc-unknown-linux-gnu", // On 32-bit PowerPC systems running SUSE Linux, gcc is configured as a // 64-bit compiler which defaults to "-m32", hence "powerpc64-suse-linux". "powerpc64-suse-linux", "powerpc-montavista-linuxspe"}; static const char *const PPCLELibDirs[] = {"/lib32", "/lib"}; - static const char *const PPCLETriples[] = {"powerpcle-linux-gnu", - "powerpcle-unknown-linux-gnu", + static const char *const PPCLETriples[] = {"powerpcle-unknown-linux-gnu", "powerpcle-linux-musl"}; static const char *const PPC64LibDirs[] = {"/lib64", "/lib"}; - static const char *const PPC64Triples[] = { - "powerpc64-linux-gnu", "powerpc64-unknown-linux-gnu", - "powerpc64-suse-linux", "ppc64-redhat-linux"}; + static const char *const PPC64Triples[] = {"powerpc64-unknown-linux-gnu", + "powerpc64-suse-linux", + "ppc64-redhat-linux"}; static const char *const PPC64LELibDirs[] = {"/lib64", "/lib"}; static const char *const PPC64LETriples[] = { - "powerpc64le-linux-gnu", "powerpc64le-unknown-linux-gnu", - "powerpc64le-none-linux-gnu", "powerpc64le-suse-linux", - "ppc64le-redhat-linux"}; + "powerpc64le-unknown-linux-gnu", "powerpc64le-none-linux-gnu", + "powerpc64le-suse-linux", "ppc64le-redhat-linux"}; static const char *const RISCV32LibDirs[] = {"/lib32", "/lib"}; static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu", - "riscv32-linux-gnu", "riscv32-unknown-elf"}; static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"}; static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu", - "riscv64-linux-gnu", "riscv64-unknown-elf"}; static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"}; - static const char *const SPARCv8Triples[] = {"sparc-linux-gnu", - "sparcv8-linux-gnu"}; + static const char *const SPARCv8Triples[] = {"sparcv8-linux-gnu"}; static const char *const SPARCv9LibDirs[] = {"/lib64", "/lib"}; - static const char *const SPARCv9Triples[] = {"sparc64-linux-gnu", - "sparcv9-linux-gnu"}; + static const char *const SPARCv9Triples[] = {"sparcv9-linux-gnu"}; static const char *const SystemZLibDirs[] = {"/lib64", "/lib"}; static const char *const SystemZTriples[] = { - "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", - "s390x-suse-linux", "s390x-redhat-linux"}; - + "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", "s390x-suse-linux", + "s390x-redhat-linux"}; using std::begin; using std::end; diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c98645993abe07..c7543a48c0b50e 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2261,8 +2261,17 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, unsigned PrefixLen = 0; - while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) + while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) { ++PrefixLen; + if (!isLexingRawMode() && + llvm::is_contained({'$', '@', '`'}, CurPtr[PrefixLen])) { + const char *Pos = &CurPtr[PrefixLen]; + Diag(Pos, LangOpts.CPlusPlus26 + ? diag::warn_cxx26_compat_raw_string_literal_character_set + : diag::ext_cxx26_raw_string_literal_character_set) + << StringRef(Pos, 1); + } + } // If the last character was not a '(', then we didn't lex a valid delimiter. if (CurPtr[PrefixLen] != '(') { diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index e9c60f76165b68..63afc18783a1f7 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -1347,11 +1347,13 @@ void Parser::ParseOpenACCCacheVarList() { ParseOpenACCVarList(OpenACCClauseKind::Invalid); } -Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { - SourceLocation StartLoc = getCurToken().getLocation(); +Parser::OpenACCDirectiveParseInfo +Parser::ParseOpenACCDirective() { + SourceLocation StartLoc = ConsumeAnnotationToken(); + SourceLocation DirLoc = getCurToken().getLocation(); OpenACCDirectiveKind DirKind = ParseOpenACCDirectiveKind(*this); - getActions().OpenACC().ActOnConstruct(DirKind, StartLoc); + getActions().OpenACC().ActOnConstruct(DirKind, DirLoc); // Once we've parsed the construct/directive name, some have additional // specifiers that need to be taken care of. Atomic has an 'atomic-clause' @@ -1390,7 +1392,7 @@ Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { break; case OpenACCDirectiveKind::Wait: // OpenACC has an optional paren-wrapped 'wait-argument'. - if (ParseOpenACCWaitArgument(StartLoc, /*IsDirective=*/true).Failed) + if (ParseOpenACCWaitArgument(DirLoc, /*IsDirective=*/true).Failed) T.skipToEnd(); else T.consumeClose(); @@ -1404,7 +1406,8 @@ Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { } // Parses the list of clauses, if present, plus set up return value. - OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, SourceLocation{}, + OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, DirLoc, + SourceLocation{}, ParseOpenACCClauseList(DirKind)}; assert(Tok.is(tok::annot_pragma_openacc_end) && @@ -1421,7 +1424,6 @@ Parser::DeclGroupPtrTy Parser::ParseOpenACCDirectiveDecl() { assert(Tok.is(tok::annot_pragma_openacc) && "expected OpenACC Start Token"); ParsingOpenACCDirectiveRAII DirScope(*this); - ConsumeAnnotationToken(); OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); @@ -1438,7 +1440,6 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { assert(Tok.is(tok::annot_pragma_openacc) && "expected OpenACC Start Token"); ParsingOpenACCDirectiveRAII DirScope(*this); - ConsumeAnnotationToken(); OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); if (getActions().OpenACC().ActOnStartStmtDirective(DirInfo.DirKind, @@ -1456,6 +1457,6 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { } return getActions().OpenACC().ActOnEndStmtDirective( - DirInfo.DirKind, DirInfo.StartLoc, DirInfo.EndLoc, DirInfo.Clauses, - AssocStmt); + DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.EndLoc, + DirInfo.Clauses, AssocStmt); } diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index b0af04451166ca..c25203243ee49b 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -239,7 +239,15 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( auto IsStmtAttr = [](ParsedAttr &Attr) { return Attr.isStmtAttr(); }; bool AllAttrsAreStmtAttrs = llvm::all_of(CXX11Attrs, IsStmtAttr) && llvm::all_of(GNUAttrs, IsStmtAttr); - if (((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || + // In C, the grammar production for statement (C23 6.8.1p1) does not allow + // for declarations, which is different from C++ (C++23 [stmt.pre]p1). So + // in C++, we always allow a declaration, but in C we need to check whether + // we're in a statement context that allows declarations. e.g., in C, the + // following is invalid: if (1) int x; + if ((getLangOpts().CPlusPlus || getLangOpts().MicrosoftExt || + (StmtCtx & ParsedStmtContext::AllowDeclarationsInC) != + ParsedStmtContext()) && + ((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || isDeclarationStatement())) { SourceLocation DeclStart = Tok.getLocation(), DeclEnd; DeclGroupPtrTy Decl; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d3e9dcb4f4399a..6595abbcdda5b1 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1444,10 +1444,10 @@ bool Sema::CheckCXXThisType(SourceLocation Loc, QualType Type) { // category are defined within such member functions as they are within // an implicit object member function). DeclContext *DC = getFunctionLevelDeclContext(); - if (const auto *Method = dyn_cast(DC); - Method && Method->isExplicitObjectMemberFunction()) { + const auto *Method = dyn_cast(DC); + if (Method && Method->isExplicitObjectMemberFunction()) { Diag(Loc, diag::err_invalid_this_use) << 1; - } else if (isLambdaCallWithExplicitObjectParameter(CurContext)) { + } else if (Method && isLambdaCallWithExplicitObjectParameter(CurContext)) { Diag(Loc, diag::err_invalid_this_use) << 1; } else { Diag(Loc, diag::err_invalid_this_use) << 0; diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index ef0a655b631ab4..be6ea20a956a39 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -5897,6 +5897,16 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction, NamedDecl *ChosenDecl = Correction.isKeyword() ? nullptr : Correction.getFoundDecl(); + + // For builtin functions which aren't declared anywhere in source, + // don't emit the "declared here" note. + if (const auto *FD = dyn_cast_if_present(ChosenDecl); + FD && FD->getBuiltinID() && + PrevNote.getDiagID() == diag::note_previous_decl && + Correction.getCorrectionRange().getBegin() == FD->getBeginLoc()) { + ChosenDecl = nullptr; + } + if (PrevNote.getDiagID() && ChosenDecl) Diag(ChosenDecl->getLocation(), PrevNote) << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo); diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 09d91b31cfe5f9..15239f4f35c39f 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -844,7 +844,7 @@ ExprResult SemaOpenACC::CheckReductionVar(Expr *VarExpr) { } void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, - SourceLocation StartLoc) { + SourceLocation DirLoc) { switch (K) { case OpenACCDirectiveKind::Invalid: // Nothing to do here, an invalid kind has nothing we can check here. We @@ -859,7 +859,7 @@ void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, // here as these constructs do not take any arguments. break; default: - Diag(StartLoc, diag::warn_acc_construct_unimplemented) << K; + Diag(DirLoc, diag::warn_acc_construct_unimplemented) << K; break; } } @@ -1265,6 +1265,7 @@ bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K, StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, + SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult AssocStmt) { @@ -1278,7 +1279,7 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, case OpenACCDirectiveKind::Kernels: // TODO OpenACC: Add clauses to the construct here. return OpenACCComputeConstruct::Create( - getASTContext(), K, StartLoc, EndLoc, Clauses, + getASTContext(), K, StartLoc, DirLoc, EndLoc, Clauses, AssocStmt.isUsable() ? AssocStmt.get() : nullptr); } llvm_unreachable("Unhandled case in directive handling?"); diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 8735d96c840793..6f538ed55cb72e 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -285,7 +285,7 @@ bool Sema::CheckAlwaysInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, static Attr *handleNoInlineAttr(Sema &S, Stmt *St, const ParsedAttr &A, SourceRange Range) { NoInlineAttr NIA(S.Context, A); - if (!NIA.isClangNoInline()) { + if (!NIA.isStmtNoInline()) { S.Diag(St->getBeginLoc(), diag::warn_function_attribute_ignored_in_stmt) << "[[clang::noinline]]"; return nullptr; @@ -684,10 +684,8 @@ ExprResult Sema::ActOnCXXAssumeAttr(Stmt *St, const ParsedAttr &A, } if (!getLangOpts().CPlusPlus23 && - A.getSyntax() == AttributeCommonInfo::AS_CXX11) { - llvm::dbgs() << "Syntax: " << int(A.getSyntax()) << "\n"; + A.getSyntax() == AttributeCommonInfo::AS_CXX11) Diag(A.getLoc(), diag::ext_cxx23_attr) << A << Range; - } return Assumption; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index dee335b526991b..765e6177d202d1 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4033,11 +4033,12 @@ class TreeTransform { StmtResult RebuildOpenACCComputeConstruct(OpenACCDirectiveKind K, SourceLocation BeginLoc, + SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef Clauses, StmtResult StrBlock) { - return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, EndLoc, - Clauses, StrBlock); + return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, DirLoc, + EndLoc, Clauses, StrBlock); } private: @@ -11559,8 +11560,8 @@ StmtResult TreeTransform::TransformOpenACCComputeConstruct( getSema().OpenACC().ActOnAssociatedStmt(C->getDirectiveKind(), StrBlock); return getDerived().RebuildOpenACCComputeConstruct( - C->getDirectiveKind(), C->getBeginLoc(), C->getEndLoc(), - TransformedClauses, StrBlock); + C->getDirectiveKind(), C->getBeginLoc(), C->getDirectiveLoc(), + C->getEndLoc(), TransformedClauses, StrBlock); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index eac4faff285490..bea2b949891070 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2797,6 +2797,7 @@ void ASTStmtReader::VisitOpenACCConstructStmt(OpenACCConstructStmt *S) { (void)Record.readInt(); S->Kind = Record.readEnum(); S->Range = Record.readSourceRange(); + S->DirectiveLoc = Record.readSourceLocation(); Record.readOpenACCClauseList(S->Clauses); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index dd548fabfd9551..e830c4026ea78f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7835,7 +7835,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { case OpenACCClauseKind::If: { const auto *IC = cast(C); writeSourceLocation(IC->getLParenLoc()); - writeStmtRef(IC->getConditionExpr()); + AddStmt(const_cast(IC->getConditionExpr())); return; } case OpenACCClauseKind::Self: { @@ -7843,7 +7843,7 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeSourceLocation(SC->getLParenLoc()); writeBool(SC->hasConditionExpr()); if (SC->hasConditionExpr()) - writeStmtRef(SC->getConditionExpr()); + AddStmt(const_cast(SC->getConditionExpr())); return; } case OpenACCClauseKind::NumGangs: { @@ -7857,13 +7857,13 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { case OpenACCClauseKind::NumWorkers: { const auto *NWC = cast(C); writeSourceLocation(NWC->getLParenLoc()); - writeStmtRef(NWC->getIntExpr()); + AddStmt(const_cast(NWC->getIntExpr())); return; } case OpenACCClauseKind::VectorLength: { const auto *NWC = cast(C); writeSourceLocation(NWC->getLParenLoc()); - writeStmtRef(NWC->getIntExpr()); + AddStmt(const_cast(NWC->getIntExpr())); return; } case OpenACCClauseKind::Private: { @@ -7942,15 +7942,15 @@ void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { writeSourceLocation(AC->getLParenLoc()); writeBool(AC->hasIntExpr()); if (AC->hasIntExpr()) - writeStmtRef(AC->getIntExpr()); + AddStmt(const_cast(AC->getIntExpr())); return; } case OpenACCClauseKind::Wait: { const auto *WC = cast(C); writeSourceLocation(WC->getLParenLoc()); writeBool(WC->getDevNumExpr()); - if (const Expr *DNE = WC->getDevNumExpr()) - writeStmtRef(DNE); + if (Expr *DNE = WC->getDevNumExpr()) + AddStmt(DNE); writeSourceLocation(WC->getQueuesLoc()); writeOpenACCIntExprList(WC->getQueueIdExprs()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a44852af97bea3..3c586b270fbf4f 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2847,6 +2847,7 @@ void ASTStmtWriter::VisitOpenACCConstructStmt(OpenACCConstructStmt *S) { Record.push_back(S->clauses().size()); Record.writeEnum(S->Kind); Record.AddSourceRange(S->Range); + Record.AddSourceLocation(S->DirectiveLoc); Record.writeOpenACCClauseList(S->clauses()); } diff --git a/clang/test/C/C99/block-scopes.c b/clang/test/C/C99/block-scopes.c index 589047df3e52bc..116e5d922593e0 100644 --- a/clang/test/C/C99/block-scopes.c +++ b/clang/test/C/C99/block-scopes.c @@ -18,8 +18,9 @@ enum {a, b}; void different(void) { - if (sizeof(enum {b, a}) != sizeof(int)) + if (sizeof(enum {b, a}) != sizeof(int)) { _Static_assert(a == 1, ""); + } /* In C89, the 'b' found here would have been from the enum declaration in * the controlling expression of the selection statement, not from the global * declaration. In C99 and later, that enumeration is scoped to the 'if' diff --git a/clang/test/CodeGen/attr-noinline.cpp b/clang/test/CodeGen/attr-noinline.cpp index f0588cfecf4631..c1fb9941b5251d 100644 --- a/clang/test/CodeGen/attr-noinline.cpp +++ b/clang/test/CodeGen/attr-noinline.cpp @@ -9,6 +9,7 @@ static int baz(int x) { } [[clang::noinline]] bool noi() { } +[[msvc::noinline]] bool ms_noi() { return true; } void foo(int i) { [[clang::noinline]] bar(); @@ -39,6 +40,31 @@ void foo(int i) { // CHECK: call noundef zeroext i1 @_Z3barv() } +void ms_noi_check(int i) { + [[msvc::noinline]] bar(); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR:[0-9]+]] + [[msvc::noinline]] i = baz(i); +// CHECK: call noundef i32 @_ZL3bazi({{.*}}) #[[NOINLINEATTR]] + [[msvc::noinline]] (i = 4, bar()); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] + [[msvc::noinline]] (void)(bar()); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] + [[msvc::noinline]] f(bar(), bar()); +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call void @_Z1fbb({{.*}}) #[[NOINLINEATTR]] + [[msvc::noinline]] [] { bar(); bar(); }(); // noinline only applies to the anonymous function call +// CHECK: call void @"_ZZ12ms_noi_checkiENK3$_0clEv"(ptr {{[^,]*}} %ref.tmp) #[[NOINLINEATTR]] + [[msvc::noinline]] for (bar(); bar(); bar()) {} +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] +// CHECK: call noundef zeroext i1 @_Z3barv() #[[NOINLINEATTR]] + [[msvc::noinline]] ms_noi(); +// CHECK: call noundef zeroext i1 @_Z6ms_noiv() + ms_noi(); +// CHECK: call noundef zeroext i1 @_Z6ms_noiv() +} + struct S { friend bool operator==(const S &LHS, const S &RHS); }; @@ -50,6 +76,12 @@ void func(const S &s1, const S &s2) { bool b; [[clang::noinline]] b = s1 == s2; // CHECK: call noundef zeroext i1 @_ZeqRK1SS1_({{.*}}) #[[NOINLINEATTR]] + + [[msvc::noinline]]g(s1 == s2); +// CHECK: call noundef zeroext i1 @_ZeqRK1SS1_({{.*}}) #[[NOINLINEATTR]] +// CHECK: call void @_Z1gb({{.*}}) #[[NOINLINEATTR]] + [[msvc::noinline]] b = s1 == s2; +// CHECK: call noundef zeroext i1 @_ZeqRK1SS1_({{.*}}) #[[NOINLINEATTR]] } // CHECK: attributes #[[NOINLINEATTR]] = { noinline } diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 93a6ab06081c99..d6ee4f68700dca 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -825,6 +825,30 @@ float extract_lane_f16x8(f16x8 a, int i) { // WEBASSEMBLY-NEXT: ret float %0 return __builtin_wasm_extract_lane_f16x8(a, i); } + +f16x8 min_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_min_f16x8(a, b); +} + +f16x8 max_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_max_f16x8(a, b); +} + +f16x8 pmin_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.pmin.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_pmin_f16x8(a, b); +} + +f16x8 pmax_f16x8(f16x8 a, f16x8 b) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.pmax.v8f16(<8 x half> %a, <8 x half> %b) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_pmax_f16x8(a, b); +} __externref_t externref_null() { return __builtin_wasm_ref_null_extern(); // WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern() diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp b/clang/test/Lexer/cxx2c-raw-strings.cpp new file mode 100644 index 00000000000000..569a4b8447e57d --- /dev/null +++ b/clang/test/Lexer/cxx2c-raw-strings.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wc++26-extensions %s +// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify=cxx26 -Wpre-c++26-compat %s + +int main() { + (void) R"abc`@$(foobar)abc`@$"; + //expected-warning@-1 {{'`' in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-2 {{'@' in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-3 {{'$' in a raw string literal delimiter is a C++2c extension}} + //cxx26-warning@-4 {{'`' in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-5 {{'@' in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-6 {{'$' in a raw string literal delimiter is incompatible with standards before C++2c}} +} diff --git a/clang/test/Parser/decls.c b/clang/test/Parser/decls.c new file mode 100644 index 00000000000000..39ef05bf4bd999 --- /dev/null +++ b/clang/test/Parser/decls.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 %s -fsyntax-only -verify -pedantic + +// Test that we can parse declarations at global scope. +int v; + +void func(void) { + // Test that we can parse declarations within a compound statement. + int a; + { + int b; + } + + int z = ({ // expected-warning {{use of GNU statement expression extension}} + // Test that we can parse declarations within a GNU statement expression. + int w = 12; + w; + }); + + // Test that we diagnose declarations where a statement is required. + // See GH92775. + if (1) + int x; // expected-error {{expected expression}} + for (;;) + int c; // expected-error {{expected expression}} + + label: + int y; // expected-warning {{label followed by a declaration is a C23 extension}} + + // Test that lookup works as expected. + (void)a; + (void)v; + (void)z; + (void)b; // expected-error {{use of undeclared identifier 'b'}} + (void)w; // expected-error {{use of undeclared identifier 'w'}} + (void)x; // expected-error {{use of undeclared identifier 'x'}} + (void)c; // expected-error {{use of undeclared identifier 'c'}} + (void)y; +} + diff --git a/clang/test/Sema/attr-noinline.cpp b/clang/test/Sema/attr-noinline.cpp index bd6505b9fe98ef..6da0e873af1b6a 100644 --- a/clang/test/Sema/attr-noinline.cpp +++ b/clang/test/Sema/attr-noinline.cpp @@ -2,9 +2,9 @@ int bar(); -// expected-note@+1{{conflicting attribute is here}} +// expected-note@+1 2 {{conflicting attribute is here}} [[gnu::always_inline]] void always_inline_fn(void) { } -// expected-note@+1{{conflicting attribute is here}} +// expected-note@+1 2 {{conflicting attribute is here}} [[gnu::flatten]] void flatten_fn(void) { } [[gnu::noinline]] void noinline_fn(void) { } @@ -25,7 +25,21 @@ void foo() { __attribute__((noinline)) bar(); // expected-warning {{attribute is ignored on this statement as it only applies to functions; use '[[clang::noinline]]' on statements}} } +void ms_noi_check() { + [[msvc::noinline]] bar(); + [[msvc::noinline(0)]] bar(); // expected-error {{'noinline' attribute takes no arguments}} + int x; + [[msvc::noinline]] x = 0; // expected-warning {{'noinline' attribute is ignored because there exists no call expression inside the statement}} + [[msvc::noinline]] { asm("nop"); } // expected-warning {{'noinline' attribute is ignored because there exists no call expression inside the statement}} + [[msvc::noinline]] label: x = 1; // expected-warning {{'noinline' attribute only applies to functions and statements}} + + [[msvc::noinline]] always_inline_fn(); // expected-warning {{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + [[msvc::noinline]] flatten_fn(); // expected-warning {{statement attribute 'noinline' has higher precedence than function attribute 'flatten'}} + [[msvc::noinline]] noinline_fn(); +} + [[clang::noinline]] static int i = bar(); // expected-warning {{'noinline' attribute only applies to functions and statements}} +[[msvc::noinline]] static int j = bar(); // expected-warning {{'noinline' attribute only applies to functions and statements}} // This used to crash the compiler. template @@ -69,7 +83,39 @@ int variadic_baz(int x) { [[clang::noinline]] return non_dependent(x) + (dependent(x) + ...); } +template [[clang::always_inline]] +int qux(int x) { // #QUX + // expected-warning@+2{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + [[msvc::noinline]] non_dependent(x); + if constexpr (D>0) { + // expected-warning@+6{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + // expected-warning@+4 3{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#QUX 3{{conflicting attribute is here}} + // expected-note@#QUX_INST 3{{in instantiation}} + // expected-note@+1 3{{in instantiation}} + [[msvc::noinline]] return non_dependent(x), qux(x + 1); + } + return x; +} + +// We can't suppress if there is a variadic involved. +template +int variadic_qux(int x) { + // Diagnoses NO_DEP 2x, once during phase 1, the second during instantiation. + // Dianoses DEP 3x, once per variadic expansion. + // expected-warning@+5 2{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP 2{{conflicting attribute is here}} + // expected-warning@+3 3{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#DEP 3{{conflicting attribute is here}} + // expected-note@#QUX_VARIADIC_INST{{in instantiation}} + [[msvc::noinline]] return non_dependent(x) + (dependent(x) + ...); +} + void use() { baz<3>(0); // #BAZ_INST variadic_baz<0, 1, 2>(0); // #VARIADIC_INST + qux<3>(0); // #QUX_INST + variadic_qux<0, 1, 2>(0); // #QUX_VARIADIC_INST } diff --git a/clang/test/SemaCXX/invalid-if-constexpr.cpp b/clang/test/SemaCXX/invalid-if-constexpr.cpp index 7643c47488f057..0007f2739cbbd0 100644 --- a/clang/test/SemaCXX/invalid-if-constexpr.cpp +++ b/clang/test/SemaCXX/invalid-if-constexpr.cpp @@ -4,8 +4,7 @@ namespace GH61885 { void similar() { // expected-note {{'similar' declared here}} if constexpr (similer<>) {} // expected-error {{use of undeclared identifier 'similer'; did you mean 'similar'?}} } -void a() { if constexpr (__adl_swap<>) {}} // expected-error{{use of undeclared identifier '__adl_swap'; did you mean '__sync_swap'?}} \ - // expected-note {{'__sync_swap' declared here}} +void a() { if constexpr (__adl_swap<>) {}} // expected-error{{use of undeclared identifier '__adl_swap'; did you mean '__sync_swap'?}} int AA() { return true;} // expected-note {{'AA' declared here}} diff --git a/clang/test/SemaCXX/invalid-this-in-lambda.cpp b/clang/test/SemaCXX/invalid-this-in-lambda.cpp new file mode 100644 index 00000000000000..ae65bda025e232 --- /dev/null +++ b/clang/test/SemaCXX/invalid-this-in-lambda.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s + +decltype([]()->decltype(this) { }) a; // expected-error {{invalid use of 'this' outside of a non-static member function}} + diff --git a/clang/test/SemaCXX/typo-correction-builtin-func.cpp b/clang/test/SemaCXX/typo-correction-builtin-func.cpp new file mode 100644 index 00000000000000..8d369034d1be33 --- /dev/null +++ b/clang/test/SemaCXX/typo-correction-builtin-func.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Test that clang does not emit 'declared here' note for builtin functions that don't have a declaration in source. + +void t0() { + constexpr float A = __builtin_isinfinity(); // expected-error {{use of undeclared identifier '__builtin_isinfinity'; did you mean '__builtin_isfinite'?}} + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} diff --git a/clang/test/SemaOpenACC/parallel-loc-and-stmt.c b/clang/test/SemaOpenACC/parallel-loc-and-stmt.c index ba29f6da8ba25d..bbcdd823483a52 100644 --- a/clang/test/SemaOpenACC/parallel-loc-and-stmt.c +++ b/clang/test/SemaOpenACC/parallel-loc-and-stmt.c @@ -33,9 +33,11 @@ int foo3; void func() { // FIXME: Should we disallow this on declarations, or consider this to be on - // the initialization? + // the initialization? This is currently rejected in C because + // Parser::ParseOpenACCDirectiveStmt() calls ParseStatement() and passes the + // statement context as "SubStmt" which does not allow for a declaration in C. #pragma acc parallel - int foo; + int foo; // expected-error {{expected expression}} #pragma acc parallel { diff --git a/clang/test/TableGen/deferred-diag.td b/clang/test/TableGen/deferred-diag.td index c1906d4a9e45ec..d7e8e694c7b3e4 100644 --- a/clang/test/TableGen/deferred-diag.td +++ b/clang/test/TableGen/deferred-diag.td @@ -4,24 +4,24 @@ include "DiagnosticBase.inc" // Test usage of Deferrable and NonDeferrable in diagnostics. -def test_default : Error<"This error is non-deferrable by default">; +def test_default : Error<"this error is non-deferrable by default">; // CHECK-DAG: DIAG(test_default, {{.*}}SFINAE_SubstitutionFailure, false, true, true, false, 0) -def test_deferrable : Error<"This error is deferrable">, Deferrable; +def test_deferrable : Error<"this error is deferrable">, Deferrable; // CHECK-DAG: DIAG(test_deferrable, {{.*}} SFINAE_SubstitutionFailure, false, true, true, true, 0) -def test_non_deferrable : Error<"This error is non-deferrable">, NonDeferrable; +def test_non_deferrable : Error<"this error is non-deferrable">, NonDeferrable; // CHECK-DAG: DIAG(test_non_deferrable, {{.*}} SFINAE_SubstitutionFailure, false, true, true, false, 0) let Deferrable = 1 in { -def test_let : Error<"This error is deferrable by let">; +def test_let : Error<"this error is deferrable by let">; // CHECK-DAG: DIAG(test_let, {{.*}} SFINAE_SubstitutionFailure, false, true, true, true, 0) // Make sure TextSubstitution is allowed in the let Deferrable block. def textsub : TextSubstitution<"%select{text1|text2}0">; -def test_let2 : Error<"This error is deferrable by let %sub{textsub}0">; +def test_let2 : Error<"this error is deferrable by let %sub{textsub}0">; // CHECK-DAG: DIAG(test_let2, {{.*}} SFINAE_SubstitutionFailure, false, true, true, true, 0) } diff --git a/clang/test/TableGen/text-substitution.td b/clang/test/TableGen/text-substitution.td index aafdbe48c43bec..b0d030aca65134 100644 --- a/clang/test/TableGen/text-substitution.td +++ b/clang/test/TableGen/text-substitution.td @@ -26,8 +26,8 @@ def sub_test_rewrite : TextSubstitution< // CHECK-SAME: Q! %q1. // CHECK-SAME: PLACEHOLDER! %0.OBJCCLASS! // CHECK-SAME: %objcclass5. OBJCINSTANCE! -// CHECK-SAME: %objcinstance4. DONE!", -def test_rewrite: Error<"%sub{sub_test_rewrite}5,4,3,2,1,0 DONE!">; +// CHECK-SAME: %objcinstance4. DONE", +def test_rewrite: Error<"%sub{sub_test_rewrite}5,4,3,2,1,0 DONE">; def test_sub_basic : Error<"%sub{yes_no}0">; // CHECK: test_sub_basic diff --git a/clang/test/TableGen/wording-errors.td b/clang/test/TableGen/wording-errors.td new file mode 100644 index 00000000000000..eb5eb2f547c782 --- /dev/null +++ b/clang/test/TableGen/wording-errors.td @@ -0,0 +1,55 @@ +// RUN: not clang-tblgen -gen-clang-diags-defs -I%S %s -o /dev/null 2>&1 | FileCheck %s +include "DiagnosticBase.inc" + +// Ensure we catch a capital letter at the start of a diagnostic. +def zero : Error< + "This is bad">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'This' is invalid + +// Test that we also correctly handle selections. +def one : Error< + "%select{|or}0 That">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'That' is invalid +def two : Error< + "%select{as does|}0 This">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'This' is invalid +def three : Error< + "%select{and||of course}0 Whatever">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not start with a capital letter; 'Whatever' is invalid + +// Test that we accept the following cases. +def four : Error< + "this is fine">; +def five : Error< + "%select{this|is|also}0 Fine">; +def six : Error< + "%select{this|is|also|}0 fine">; +def seven : Error< + "%select{ARC|C|C23|C++14|OpenMP}0 are also fine">; + +// Next, test that we catch punctuation at the end of the diagnostic. +def eight : Error< + "punctuation is bad.">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def nine : Error< + "it's really bad!">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '!' is invalid +def ten : Error< + "we also catch %select{punctuation.|in select}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def eleven : Error< + "and %select{|here.}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def twelve : Error< + "and %select{here.|}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def thirteen : Error< + "and even %select{|here.|}0">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid +def fourteen : Error< + "and %select{here}0.">; +// CHECK-DAG: wording-errors.td:[[@LINE-2]]:5: error: Diagnostics should not end with punctuation; '.' is invalid + +// Test that we accept the following cases. +def fifteen : Error< + "question marks are intentionally okay?">; diff --git a/clang/unittests/Interpreter/CodeCompletionTest.cpp b/clang/unittests/Interpreter/CodeCompletionTest.cpp index 873fbda32f0579..72c02c683fafd4 100644 --- a/clang/unittests/Interpreter/CodeCompletionTest.cpp +++ b/clang/unittests/Interpreter/CodeCompletionTest.cpp @@ -4,6 +4,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/Sema.h" +#include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/LineEditor/LineEditor.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" @@ -11,6 +12,10 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#if defined(_AIX) || defined(__MVS__) +#define CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +#endif + using namespace clang; namespace { auto CB = clang::IncrementalCompilerBuilder(); @@ -50,7 +55,21 @@ static std::vector runComp(clang::Interpreter &MainInterp, return Comps; } +static bool HostSupportsJit() { + auto J = llvm::orc::LLJITBuilder().create(); + if (J) + return true; + LLVMConsumeError(llvm::wrap(J.takeError())); + return false; +} + +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_Sanity) { +#else TEST(CodeCompletionTest, Sanity) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int foo = 12;")); auto Err = llvm::Error::success(); @@ -61,7 +80,13 @@ TEST(CodeCompletionTest, Sanity) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_SanityNoneValid) { +#else TEST(CodeCompletionTest, SanityNoneValid) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int foo = 12;")); auto Err = llvm::Error::success(); @@ -70,7 +95,13 @@ TEST(CodeCompletionTest, SanityNoneValid) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_TwoDecls) { +#else TEST(CodeCompletionTest, TwoDecls) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int application = 12;")); cantFail(Interp->Parse("int apple = 12;")); @@ -80,14 +111,26 @@ TEST(CodeCompletionTest, TwoDecls) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_CompFunDeclsNoError) { +#else TEST(CodeCompletionTest, CompFunDeclsNoError) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); auto Err = llvm::Error::success(); auto comps = runComp(*Interp, "void app(", Err); EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_TypedDirected) { +#else TEST(CodeCompletionTest, TypedDirected) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int application = 12;")); cantFail(Interp->Parse("char apple = '2';")); @@ -119,7 +162,13 @@ TEST(CodeCompletionTest, TypedDirected) { } } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_SanityClasses) { +#else TEST(CodeCompletionTest, SanityClasses) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("struct Apple{};")); cantFail(Interp->Parse("void takeApple(Apple &a1){}")); @@ -142,7 +191,13 @@ TEST(CodeCompletionTest, SanityClasses) { } } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_SubClassing) { +#else TEST(CodeCompletionTest, SubClassing) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("struct Fruit {};")); cantFail(Interp->Parse("struct Apple : Fruit{};")); @@ -157,7 +212,13 @@ TEST(CodeCompletionTest, SubClassing) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_MultipleArguments) { +#else TEST(CodeCompletionTest, MultipleArguments) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse("int foo = 42;")); cantFail(Interp->Parse("char fowl = 'A';")); @@ -169,7 +230,13 @@ TEST(CodeCompletionTest, MultipleArguments) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_Methods) { +#else TEST(CodeCompletionTest, Methods) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse( "struct Foo{int add(int a){return 42;} int par(int b){return 42;}};")); @@ -183,7 +250,13 @@ TEST(CodeCompletionTest, Methods) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_MethodsInvocations) { +#else TEST(CodeCompletionTest, MethodsInvocations) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse( "struct Foo{int add(int a){return 42;} int par(int b){return 42;}};")); @@ -197,7 +270,13 @@ TEST(CodeCompletionTest, MethodsInvocations) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_NestedInvocations) { +#else TEST(CodeCompletionTest, NestedInvocations) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail(Interp->Parse( "struct Foo{int add(int a){return 42;} int par(int b){return 42;}};")); @@ -212,7 +291,13 @@ TEST(CodeCompletionTest, NestedInvocations) { EXPECT_EQ((bool)Err, false); } +#ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT +TEST(CodeCompletionTest, DISABLED_TemplateFunctions) { +#else TEST(CodeCompletionTest, TemplateFunctions) { +#endif + if (!HostSupportsJit()) + GTEST_SKIP(); auto Interp = createInterpreter(); cantFail( Interp->Parse("template T id(T a) { return a;} ")); diff --git a/clang/unittests/Interpreter/IncrementalProcessingTest.cpp b/clang/unittests/Interpreter/IncrementalProcessingTest.cpp index 54159173d91e39..732753f11306e6 100644 --- a/clang/unittests/Interpreter/IncrementalProcessingTest.cpp +++ b/clang/unittests/Interpreter/IncrementalProcessingTest.cpp @@ -36,14 +36,6 @@ using namespace clang; namespace { -static bool HostSupportsJit() { - auto J = llvm::orc::LLJITBuilder().create(); - if (J) - return true; - LLVMConsumeError(llvm::wrap(J.takeError())); - return false; -} - // Incremental processing produces several modules, all using the same "main // file". Make sure CodeGen can cope with that, e.g. for static initializers. const char TestProgram1[] = "extern \"C\" int funcForProg1() { return 17; }\n" @@ -64,11 +56,22 @@ const Function *getGlobalInit(llvm::Module *M) { return nullptr; } +static bool HostSupportsJit() { + auto J = llvm::orc::LLJITBuilder().create(); + if (J) + return true; + LLVMConsumeError(llvm::wrap(J.takeError())); + return false; +} + #ifdef CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT TEST(IncrementalProcessing, DISABLED_EmitCXXGlobalInitFunc) { #else TEST(IncrementalProcessing, EmitCXXGlobalInitFunc) { #endif + if (!HostSupportsJit()) + GTEST_SKIP(); + std::vector ClangArgv = {"-Xclang", "-emit-llvm-only"}; auto CB = clang::IncrementalCompilerBuilder(); CB.SetCompilerArgs(ClangArgv); diff --git a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp index f564689fff7cf1..b290530444d2ab 100644 --- a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp +++ b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp @@ -1213,6 +1213,197 @@ static bool isRemark(const Record &Diag) { return ClsName == "CLASS_REMARK"; } +// Presumes the text has been split at the first whitespace or hyphen. +static bool isExemptAtStart(StringRef Text) { + // Fast path, the first character is lowercase or not alphanumeric. + if (Text.empty() || isLower(Text[0]) || !isAlnum(Text[0])) + return true; + + // If the text is all uppercase (or numbers, +, or _), then we assume it's an + // acronym and that's allowed. This covers cases like ISO, C23, C++14, and + // OBJECT_MODE. However, if there's only a single letter other than "C", we + // do not exempt it so that we catch a case like "A really bad idea" while + // still allowing a case like "C does not allow...". + if (llvm::all_of(Text, [](char C) { + return isUpper(C) || isDigit(C) || C == '+' || C == '_'; + })) + return Text.size() > 1 || Text[0] == 'C'; + + // Otherwise, there are a few other exemptions. + return StringSwitch(Text) + .Case("AddressSanitizer", true) + .Case("CFString", true) + .Case("Clang", true) + .Case("Fuchsia", true) + .Case("GNUstep", true) + .Case("IBOutletCollection", true) + .Case("Microsoft", true) + .Case("Neon", true) + .StartsWith("NSInvocation", true) // NSInvocation, NSInvocation's + .Case("Objective", true) // Objective-C (hyphen is a word boundary) + .Case("OpenACC", true) + .Case("OpenCL", true) + .Case("OpenMP", true) + .Case("Pascal", true) + .Case("Swift", true) + .Case("Unicode", true) + .Case("Vulkan", true) + .Case("WebAssembly", true) + .Default(false); +} + +// Does not presume the text has been split at all. +static bool isExemptAtEnd(StringRef Text) { + // Rather than come up with a list of characters that are allowed, we go the + // other way and look only for characters that are not allowed. + switch (Text.back()) { + default: + return true; + case '?': + // Explicitly allowed to support "; did you mean?". + return true; + case '.': + case '!': + return false; + } +} + +static void verifyDiagnosticWording(const Record &Diag) { + StringRef FullDiagText = Diag.getValueAsString("Summary"); + + auto DiagnoseStart = [&](StringRef Text) { + // Verify that the text does not start with a capital letter, except for + // special cases that are exempt like ISO and C++. Find the first word + // by looking for a word breaking character. + char Separators[] = {' ', '-', ',', '}'}; + auto Iter = std::find_first_of( + Text.begin(), Text.end(), std::begin(Separators), std::end(Separators)); + + StringRef First = Text.substr(0, Iter - Text.begin()); + if (!isExemptAtStart(First)) { + PrintError(&Diag, + "Diagnostics should not start with a capital letter; '" + + First + "' is invalid"); + } + }; + + auto DiagnoseEnd = [&](StringRef Text) { + // Verify that the text does not end with punctuation like '.' or '!'. + if (!isExemptAtEnd(Text)) { + PrintError(&Diag, "Diagnostics should not end with punctuation; '" + + Text.substr(Text.size() - 1, 1) + "' is invalid"); + } + }; + + // If the diagnostic starts with %select, look through it to see whether any + // of the options will cause a problem. + if (FullDiagText.starts_with("%select{")) { + // Do a balanced delimiter scan from the start of the text to find the + // closing '}', skipping intermediary {} pairs. + + size_t BraceCount = 1; + constexpr size_t PercentSelectBraceLen = sizeof("%select{") - 1; + auto Iter = FullDiagText.begin() + PercentSelectBraceLen; + for (auto End = FullDiagText.end(); Iter != End; ++Iter) { + char Ch = *Iter; + if (Ch == '{') + ++BraceCount; + else if (Ch == '}') + --BraceCount; + if (!BraceCount) + break; + } + // Defending against a malformed diagnostic string. + if (BraceCount != 0) + return; + + StringRef SelectText = + FullDiagText.substr(PercentSelectBraceLen, Iter - FullDiagText.begin() - + PercentSelectBraceLen); + SmallVector SelectPieces; + SelectText.split(SelectPieces, '|'); + + // Walk over all of the individual pieces of select text to see if any of + // them start with an invalid character. If any of the select pieces is + // empty, we need to look at the first word after the %select to see + // whether that is invalid or not. If all of the pieces are fine, then we + // don't need to check anything else about the start of the diagnostic. + bool CheckSecondWord = false; + for (StringRef Piece : SelectPieces) { + if (Piece.empty()) + CheckSecondWord = true; + else + DiagnoseStart(Piece); + } + + if (CheckSecondWord) { + // There was an empty select piece, so we need to check the second + // word. This catches situations like '%select{|fine}0 Not okay'. Add + // two to account for the closing curly brace and the number after it. + StringRef AfterSelect = + FullDiagText.substr(Iter - FullDiagText.begin() + 2).ltrim(); + DiagnoseStart(AfterSelect); + } + } else { + // If the start of the diagnostic is not %select, we can check the first + // word and be done with it. + DiagnoseStart(FullDiagText); + } + + // If the last character in the diagnostic is a number preceded by a }, scan + // backwards to see if this is for a %select{...}0. If it is, we need to look + // at each piece to see whether it ends in punctuation or not. + bool StillNeedToDiagEnd = true; + if (isDigit(FullDiagText.back()) && *(FullDiagText.end() - 2) == '}') { + // Scan backwards to find the opening curly brace. + size_t BraceCount = 1; + auto Iter = FullDiagText.end() - sizeof("}0"); + for (auto End = FullDiagText.begin(); Iter != End; --Iter) { + char Ch = *Iter; + if (Ch == '}') + ++BraceCount; + else if (Ch == '{') + --BraceCount; + if (!BraceCount) + break; + } + // Defending against a malformed diagnostic string. + if (BraceCount != 0) + return; + + // Continue the backwards scan to find the word before the '{' to see if it + // is 'select'. + constexpr size_t SelectLen = sizeof("select") - 1; + bool IsSelect = + (FullDiagText.substr(Iter - SelectLen - FullDiagText.begin(), + SelectLen) == "select"); + if (IsSelect) { + // Gather the content between the {} for the select in question so we can + // split it into pieces. + StillNeedToDiagEnd = false; // No longer need to handle the end. + StringRef SelectText = + FullDiagText.substr(Iter - FullDiagText.begin() + /*{*/ 1, + FullDiagText.end() - Iter - /*pos before }0*/ 3); + SmallVector SelectPieces; + SelectText.split(SelectPieces, '|'); + for (StringRef Piece : SelectPieces) { + // Not worrying about a situation like: "this is bar. %select{foo|}0". + if (!Piece.empty()) + DiagnoseEnd(Piece); + } + } + } + + // If we didn't already cover the diagnostic because of a %select, handle it + // now. + if (StillNeedToDiagEnd) + DiagnoseEnd(FullDiagText); + + // FIXME: This could also be improved by looking for instances of clang or + // gcc in the diagnostic and recommend Clang or GCC instead. However, this + // runs into odd situations like [[clang::warn_unused_result]], + // #pragma clang, or --unwindlib=libgcc. +} /// ClangDiagsDefsEmitter - The top-level class emits .def files containing /// declarations of Clang diagnostics. @@ -1273,6 +1464,9 @@ void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS, if (!Component.empty() && Component != R.getValueAsString("Component")) continue; + // Validate diagnostic wording for common issues. + verifyDiagnosticWording(R); + OS << "DIAG(" << R.getName() << ", "; OS << R.getValueAsDef("Class")->getName(); OS << ", (unsigned)diag::Severity::" diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index a2fc27de1901b4..9375e27d4f4d24 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -100,7 +100,17 @@ int msan_report_count = 0; // Array of stack origins. // FIXME: make it resizable. -static const uptr kNumStackOriginDescrs = 1024 * 1024; +// Although BSS memory doesn't cost anything until used, it is limited to 2GB +// in some configurations (e.g., "relocation R_X86_64_PC32 out of range: +// ... is not in [-2147483648, 2147483647]; references section '.bss'"). +// We use kNumStackOriginDescrs * (sizeof(char*) + sizeof(uptr)) == 64MB. +#ifdef SANITIZER_PPC +// soft_rss_limit test (release_origin.c) fails on PPC if kNumStackOriginDescrs +// is too high +static const uptr kNumStackOriginDescrs = 1 * 1024 * 1024; +#else +static const uptr kNumStackOriginDescrs = 4 * 1024 * 1024; +#endif // SANITIZER_PPC static const char *StackOriginDescr[kNumStackOriginDescrs]; static uptr StackOriginPC[kNumStackOriginDescrs]; static atomic_uint32_t NumStackOriginDescrs; diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h index 06a44f1885656f..510ff729989145 100644 --- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h +++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h @@ -101,6 +101,10 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern { mlir::Value box, mlir::ConversionPatternRewriter &rewriter) const; + mlir::Value getRankFromBox(mlir::Location loc, TypePair boxTy, + mlir::Value box, + mlir::ConversionPatternRewriter &rewriter) const; + // Get the element type given an LLVM type that is of the form // (array|struct|vector)+ and the provided indexes. mlir::Type getBoxEleTy(mlir::Type type, diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.h b/flang/include/flang/Optimizer/HLFIR/Passes.h index edefe36de00c16..83388d0527e192 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.h +++ b/flang/include/flang/Optimizer/HLFIR/Passes.h @@ -20,10 +20,6 @@ namespace hlfir { #define GEN_PASS_DECL -#include "flang/Optimizer/HLFIR/Passes.h.inc" - -std::unique_ptr createConvertHLFIRtoFIRPass(); - #define GEN_PASS_REGISTRATION #include "flang/Optimizer/HLFIR/Passes.h.inc" } // namespace hlfir diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td index 1dd2e3dc81911f..ed49f5093c9652 100644 --- a/flang/include/flang/Optimizer/HLFIR/Passes.td +++ b/flang/include/flang/Optimizer/HLFIR/Passes.td @@ -12,7 +12,6 @@ include "mlir/Pass/PassBase.td" def ConvertHLFIRtoFIR : Pass<"convert-hlfir-to-fir", "::mlir::ModuleOp"> { let summary = "Lower High-Level FIR to FIR"; - let constructor = "hlfir::createConvertHLFIRtoFIRPass()"; let dependentDialects = [ "mlir::func::FuncDialect", ]; diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index bb3c90ebc04d44..61ea7a7f9bbdd2 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -331,7 +331,7 @@ inline void createHLFIRToFIRPassPipeline( pm.addPass(hlfir::createLowerHLFIROrderedAssignments()); pm.addPass(hlfir::createLowerHLFIRIntrinsics()); pm.addPass(hlfir::createBufferizeHLFIR()); - pm.addPass(hlfir::createConvertHLFIRtoFIRPass()); + pm.addPass(hlfir::createConvertHLFIRtoFIR()); } /// Create a pass pipeline for handling certain OpenMP transformations needed diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 74e68725003cb9..664453ebaf2f74 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -391,9 +391,8 @@ struct BoxIsArrayOpConversion : public fir::FIROpConversion { mlir::Value a = adaptor.getOperands()[0]; auto loc = boxisarray.getLoc(); TypePair boxTyPair = getBoxTypePair(boxisarray.getVal().getType()); - auto rank = getValueFromBox(loc, boxTyPair, a, rewriter.getI32Type(), - rewriter, kRankPosInBox); - auto c0 = genConstantOffset(loc, rewriter, 0); + mlir::Value rank = getRankFromBox(loc, boxTyPair, a, rewriter); + mlir::Value c0 = genConstantIndex(loc, rank.getType(), rewriter, 0); rewriter.replaceOpWithNewOp( boxisarray, mlir::LLVM::ICmpPredicate::ne, rank, c0); return mlir::success(); @@ -430,8 +429,8 @@ struct BoxRankOpConversion : public fir::FIROpConversion { auto loc = boxrank.getLoc(); mlir::Type ty = convertType(boxrank.getType()); TypePair boxTyPair = getBoxTypePair(boxrank.getVal().getType()); - auto result = - getValueFromBox(loc, boxTyPair, a, ty, rewriter, kRankPosInBox); + mlir::Value rank = getRankFromBox(loc, boxTyPair, a, rewriter); + mlir::Value result = integerCast(loc, rewriter, ty, rank); rewriter.replaceOp(boxrank, result); return mlir::success(); } diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index 69e78167b07333..8c726d547491a7 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -179,6 +179,14 @@ mlir::Value ConvertFIRToLLVMPattern::getElementSizeFromBox( return getValueFromBox(loc, boxTy, box, resultTy, rewriter, kElemLenPosInBox); } +/// Read base address from a fir.box. Returned address has type ty. +mlir::Value ConvertFIRToLLVMPattern::getRankFromBox( + mlir::Location loc, TypePair boxTy, mlir::Value box, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type resultTy = getBoxEleTy(boxTy.llvm, {kRankPosInBox}); + return getValueFromBox(loc, boxTy, box, resultTy, rewriter, kRankPosInBox); +} + // Get the element type given an LLVM type that is of the form // (array|struct|vector)+ and the provided indexes. mlir::Type ConvertFIRToLLVMPattern::getBoxEleTy( diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp index e56595d1c8e232..b8823bfa59f8f2 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -789,7 +789,3 @@ class ConvertHLFIRtoFIR }; } // namespace - -std::unique_ptr hlfir::createConvertHLFIRtoFIRPass() { - return std::make_unique(); -} diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index f34820dd10792a..0224ecfdde7c60 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -18,34 +18,34 @@ module cudadevice ! Synchronization Functions interface - attributes(device) subroutine syncthreads() + attributes(device) subroutine syncthreads() bind(c, name='__syncthreads') end subroutine end interface public :: syncthreads interface - attributes(device) integer function syncthreads_and(value) + attributes(device) integer function syncthreads_and(value) bind(c, name='__syncthreads_and') integer :: value end function end interface public :: syncthreads_and interface - attributes(device) integer function syncthreads_count(value) + attributes(device) integer function syncthreads_count(value) bind(c, name='__syncthreads_count') integer :: value end function end interface public :: syncthreads_count interface - attributes(device) integer function syncthreads_or(value) + attributes(device) integer function syncthreads_or(value) bind(c, name='__syncthreads_or') integer :: value end function end interface public :: syncthreads_or interface - attributes(device) subroutine syncwarp(mask) + attributes(device) subroutine syncwarp(mask) bind(c, name='__syncwarp') integer :: mask end subroutine end interface @@ -54,19 +54,19 @@ attributes(device) subroutine syncwarp(mask) ! Memory Fences interface - attributes(device) subroutine threadfence() + attributes(device) subroutine threadfence() bind(c, name='__threadfence') end subroutine end interface public :: threadfence interface - attributes(device) subroutine threadfence_block() + attributes(device) subroutine threadfence_block() bind(c, name='__threadfence_block') end subroutine end interface public :: threadfence_block interface - attributes(device) subroutine threadfence_system() + attributes(device) subroutine threadfence_system() bind(c, name='__threadfence_system') end subroutine end interface public :: threadfence_system diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 21323a5e657c94..70cb0443e9a645 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -941,7 +941,8 @@ func.func @extract_rank(%arg0: !fir.box>) -> i32 { // CHECK-LABEL: llvm.func @extract_rank( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) -> i32 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> -// CHECK: %[[RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i32 +// CHECK: %[[RAW_RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i8 +// CHECK: %[[RANK:.*]] = llvm.sext %[[RAW_RANK]] : i8 to i32 // CHECK: llvm.return %[[RANK]] : i32 // ----- @@ -1009,9 +1010,9 @@ func.func @box_isarray(%arg0: !fir.box>) -> i1 { // CHECK-LABEL: llvm.func @box_isarray( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) -> i1 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> -// CHECK: %[[RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i32 -// CHECK: %[[C0_ISARRAY:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[IS_ARRAY:.*]] = llvm.icmp "ne" %[[RANK]], %[[C0_ISARRAY]] : i32 +// CHECK: %[[RANK:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> i8 +// CHECK: %[[C0_ISARRAY:.*]] = llvm.mlir.constant(0 : i64) : i8 +// CHECK: %[[IS_ARRAY:.*]] = llvm.icmp "ne" %[[RANK]], %[[C0_ISARRAY]] : i8 // CHECK: llvm.return %[[IS_ARRAY]] : i1 // ----- diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir index 048f53f5c6e47a..f4f23d35cba257 100644 --- a/flang/test/Fir/tbaa.fir +++ b/flang/test/Fir/tbaa.fir @@ -248,8 +248,9 @@ func.func @tbaa(%arg0: !fir.box>) -> i32 { // CHECK-LABEL: llvm.func @tbaa( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> i32 { // CHECK: %[[VAL_1:.*]] = llvm.getelementptr %[[VAL_0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> -// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i32 -// CHECK: llvm.return %[[VAL_2]] : i32 +// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i8 +// CHECK: %[[VAL_3:.*]] = llvm.sext %[[VAL_2]] : i8 to i32 +// CHECK: llvm.return %[[VAL_3]] : i32 // CHECK: } // ----- @@ -267,9 +268,9 @@ func.func @tbaa(%arg0: !fir.box>) -> i1 { // CHECK-LABEL: llvm.func @tbaa( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr) -> i1 { // CHECK: %[[VAL_1:.*]] = llvm.getelementptr %[[VAL_0]][0, 3] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> -// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i32 -// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[VAL_4:.*]] = llvm.icmp "ne" %[[VAL_2]], %[[VAL_3]] : i32 +// CHECK: %[[VAL_2:.*]] = llvm.load %[[VAL_1]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i8 +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(0 : i64) : i8 +// CHECK: %[[VAL_4:.*]] = llvm.icmp "ne" %[[VAL_2]], %[[VAL_3]] : i8 // CHECK: llvm.return %[[VAL_4]] : i1 // CHECK: } diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf new file mode 100644 index 00000000000000..0c71ea6efcd632 --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -0,0 +1,36 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +! Test CUDA Fortran procedures available in cudadevice module + +attributes(global) subroutine devsub() + implicit none + integer :: ret + + call syncthreads() + call syncwarp(1) + call threadfence() + call threadfence_block() + call threadfence_system() + ret = syncthreads_and(1) + ret = syncthreads_count(1) + ret = syncthreads_or(1) +end + +! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} +! CHECK: fir.call @__syncthreads() +! CHECK: fir.call @__syncwarp(%{{.*}}) fastmath : (!fir.ref) -> () +! CHECK: fir.call @__threadfence() +! CHECK: fir.call @__threadfence_block() +! CHECK: fir.call @__threadfence_system() +! CHECK: %{{.*}} = fir.call @__syncthreads_and(%{{.*}}) fastmath : (!fir.ref) -> i32 +! CHECK: %{{.*}} = fir.call @__syncthreads_count(%{{.*}}) fastmath : (!fir.ref) -> i32 +! CHECK: %{{.*}} = fir.call @__syncthreads_or(%{{.*}}) fastmath : (!fir.ref) -> i32 + +! CHECK: func.func private @__syncthreads() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads"} +! CHECK: func.func private @__syncwarp(!fir.ref {cuf.data_attr = #cuf.cuda}) attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncwarp"} +! CHECK: func.func private @__threadfence() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence"} +! CHECK: func.func private @__threadfence_block() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_block"} +! CHECK: func.func private @__threadfence_system() attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__threadfence_system"} +! CHECK: func.func private @__syncthreads_and(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_and"} +! CHECK: func.func private @__syncthreads_count(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_count"} +! CHECK: func.func private @__syncthreads_or(!fir.ref {cuf.data_attr = #cuf.cuda}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncthreads_or"} diff --git a/flang/test/Lower/PowerPC/ppc-vec-load.f90 b/flang/test/Lower/PowerPC/ppc-vec-load.f90 index 4d51512df0f7b4..a81ed055ce08c8 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-load.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-load.f90 @@ -1,12 +1,13 @@ -! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-LE" %s -! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-BE" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-LE","LLVM" %s +! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr9 -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR_P9","LLVM" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefixes="LLVMIR","LLVMIR-BE","LLVM" %s ! REQUIRES: target=powerpc{{.*}} !---------------------- ! vec_ld !---------------------- -! CHECK-LABEL: @vec_ld_testi8 +! LLVM-LABEL: @vec_ld_testi8 subroutine vec_ld_testi8(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2, res @@ -19,7 +20,7 @@ subroutine vec_ld_testi8(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testi8 -! CHECK-LABEL: @vec_ld_testi16 +! LLVM-LABEL: @vec_ld_testi16 subroutine vec_ld_testi16(arg1, arg2, res) integer(2) :: arg1 vector(integer(2)) :: arg2, res @@ -32,7 +33,7 @@ subroutine vec_ld_testi16(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testi16 -! CHECK-LABEL: @vec_ld_testi32 +! LLVM-LABEL: @vec_ld_testi32 subroutine vec_ld_testi32(arg1, arg2, res) integer(4) :: arg1 vector(integer(4)) :: arg2, res @@ -44,7 +45,7 @@ subroutine vec_ld_testi32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testi32 -! CHECK-LABEL: @vec_ld_testf32 +! LLVM-LABEL: @vec_ld_testf32 subroutine vec_ld_testf32(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2, res @@ -58,7 +59,7 @@ subroutine vec_ld_testf32(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testf32 -! CHECK-LABEL: @vec_ld_testu32 +! LLVM-LABEL: @vec_ld_testu32 subroutine vec_ld_testu32(arg1, arg2, res) integer(1) :: arg1 vector(unsigned(4)) :: arg2, res @@ -70,7 +71,7 @@ subroutine vec_ld_testu32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ld_testu32 -! CHECK-LABEL: @vec_ld_testi32a +! LLVM-LABEL: @vec_ld_testi32a subroutine vec_ld_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(10) @@ -83,7 +84,7 @@ subroutine vec_ld_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ld_testi32a -! CHECK-LABEL: @vec_ld_testf32av +! LLVM-LABEL: @vec_ld_testf32av subroutine vec_ld_testf32av(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2(2, 4, 8) @@ -98,7 +99,7 @@ subroutine vec_ld_testf32av(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ld_testf32av -! CHECK-LABEL: @vec_ld_testi32s +! LLVM-LABEL: @vec_ld_testi32s subroutine vec_ld_testi32s(arg1, arg2, res) integer(4) :: arg1 real(4) :: arg2 @@ -116,7 +117,7 @@ end subroutine vec_ld_testi32s ! vec_lde !---------------------- -! CHECK-LABEL: @vec_lde_testi8s +! LLVM-LABEL: @vec_lde_testi8s subroutine vec_lde_testi8s(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2 @@ -129,7 +130,7 @@ subroutine vec_lde_testi8s(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[call]], ptr %2, align 16 end subroutine vec_lde_testi8s -! CHECK-LABEL: @vec_lde_testi16a +! LLVM-LABEL: @vec_lde_testi16a subroutine vec_lde_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -142,7 +143,7 @@ subroutine vec_lde_testi16a(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[call]], ptr %2, align 16 end subroutine vec_lde_testi16a -! CHECK-LABEL: @vec_lde_testi32a +! LLVM-LABEL: @vec_lde_testi32a subroutine vec_lde_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(4) @@ -155,7 +156,7 @@ subroutine vec_lde_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_lde_testi32a -! CHECK-LABEL: @vec_lde_testf32a +! LLVM-LABEL: @vec_lde_testf32a subroutine vec_lde_testf32a(arg1, arg2, res) integer(8) :: arg1 real(4) :: arg2(4) @@ -173,7 +174,7 @@ end subroutine vec_lde_testf32a ! vec_ldl !---------------------- -! CHECK-LABEL: @vec_ldl_testi8 +! LLVM-LABEL: @vec_ldl_testi8 subroutine vec_ldl_testi8(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2, res @@ -186,7 +187,7 @@ subroutine vec_ldl_testi8(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testi8 -! CHECK-LABEL: @vec_ldl_testi16 +! LLVM-LABEL: @vec_ldl_testi16 subroutine vec_ldl_testi16(arg1, arg2, res) integer(2) :: arg1 vector(integer(2)) :: arg2, res @@ -199,7 +200,7 @@ subroutine vec_ldl_testi16(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testi16 -! CHECK-LABEL: @vec_ldl_testi32 +! LLVM-LABEL: @vec_ldl_testi32 subroutine vec_ldl_testi32(arg1, arg2, res) integer(4) :: arg1 vector(integer(4)) :: arg2, res @@ -211,7 +212,7 @@ subroutine vec_ldl_testi32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testi32 -! CHECK-LABEL: @vec_ldl_testf32 +! LLVM-LABEL: @vec_ldl_testf32 subroutine vec_ldl_testf32(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2, res @@ -225,7 +226,7 @@ subroutine vec_ldl_testf32(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testf32 -! CHECK-LABEL: @vec_ldl_testu32 +! LLVM-LABEL: @vec_ldl_testu32 subroutine vec_ldl_testu32(arg1, arg2, res) integer(1) :: arg1 vector(unsigned(4)) :: arg2, res @@ -237,7 +238,7 @@ subroutine vec_ldl_testu32(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ldl_testu32 -! CHECK-LABEL: @vec_ldl_testi32a +! LLVM-LABEL: @vec_ldl_testi32a subroutine vec_ldl_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(10) @@ -250,7 +251,7 @@ subroutine vec_ldl_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16 end subroutine vec_ldl_testi32a -! CHECK-LABEL: @vec_ldl_testf32av +! LLVM-LABEL: @vec_ldl_testf32av subroutine vec_ldl_testf32av(arg1, arg2, res) integer(8) :: arg1 vector(real(4)) :: arg2(2, 4, 8) @@ -264,7 +265,7 @@ subroutine vec_ldl_testf32av(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_ldl_testf32av -! CHECK-LABEL: @vec_ldl_testi32s +! LLVM-LABEL: @vec_ldl_testi32s subroutine vec_ldl_testi32s(arg1, arg2, res) integer(4) :: arg1 real(4) :: arg2 @@ -282,7 +283,7 @@ end subroutine vec_ldl_testi32s ! vec_lvsl !---------------------- -! CHECK-LABEL: @vec_lvsl_testi8s +! LLVM-LABEL: @vec_lvsl_testi8s subroutine vec_lvsl_testi8s(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2 @@ -300,7 +301,7 @@ subroutine vec_lvsl_testi8s(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lvsl_testi8s -! CHECK-LABEL: @vec_lvsl_testi16a +! LLVM-LABEL: @vec_lvsl_testi16a subroutine vec_lvsl_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(4) @@ -318,7 +319,7 @@ subroutine vec_lvsl_testi16a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lvsl_testi16a -! CHECK-LABEL: @vec_lvsl_testi32a +! LLVM-LABEL: @vec_lvsl_testi32a subroutine vec_lvsl_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 3, 4) @@ -336,7 +337,7 @@ subroutine vec_lvsl_testi32a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lvsl_testi32a -! CHECK-LABEL: @vec_lvsl_testf32a +! LLVM-LABEL: @vec_lvsl_testf32a subroutine vec_lvsl_testf32a(arg1, arg2, res) integer(8) :: arg1 real(4) :: arg2(4) @@ -357,7 +358,7 @@ end subroutine vec_lvsl_testf32a ! vec_lvsr !---------------------- -! CHECK-LABEL: @vec_lvsr_testi8s +! LLVM-LABEL: @vec_lvsr_testi8s subroutine vec_lvsr_testi8s(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2 @@ -375,7 +376,7 @@ subroutine vec_lvsr_testi8s(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[addr]], ptr %2, align 16 end subroutine vec_lvsr_testi8s -! CHECK-LABEL: @vec_lvsr_testi16a +! LLVM-LABEL: @vec_lvsr_testi16a subroutine vec_lvsr_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(4) @@ -393,7 +394,7 @@ subroutine vec_lvsr_testi16a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[addr]], ptr %2, align 16 end subroutine vec_lvsr_testi16a -! CHECK-LABEL: @vec_lvsr_testi32a +! LLVM-LABEL: @vec_lvsr_testi32a subroutine vec_lvsr_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 3, 4) @@ -411,7 +412,7 @@ subroutine vec_lvsr_testi32a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[addr]], ptr %2, align 16 end subroutine vec_lvsr_testi32a -! CHECK-LABEL: @vec_lvsr_testf32a +! LLVM-LABEL: @vec_lvsr_testf32a subroutine vec_lvsr_testf32a(arg1, arg2, res) integer(8) :: arg1 real(4) :: arg2(4) @@ -432,7 +433,7 @@ end subroutine vec_lvsr_testf32a ! vec_lxv !---------------------- -! CHECK-LABEL: @vec_lxv_testi8a +! LLVM-LABEL: @vec_lxv_testi8a subroutine vec_lxv_testi8a(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2(4) @@ -445,7 +446,7 @@ subroutine vec_lxv_testi8a(arg1, arg2, res) ! LLVMIR_P9: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testi8a -! CHECK-LABEL: @vec_lxv_testi16a +! LLVM-LABEL: @vec_lxv_testi16a subroutine vec_lxv_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -458,7 +459,7 @@ subroutine vec_lxv_testi16a(arg1, arg2, res) ! LLVMIR_P9: store <8 x i16> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testi16a -! CHECK-LABEL: @vec_lxv_testi32a +! LLVM-LABEL: @vec_lxv_testi32a subroutine vec_lxv_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 4, 8) @@ -471,7 +472,7 @@ subroutine vec_lxv_testi32a(arg1, arg2, res) ! LLVMIR_P9: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testi32a -! CHECK-LABEL: @vec_lxv_testf32a +! LLVM-LABEL: @vec_lxv_testf32a subroutine vec_lxv_testf32a(arg1, arg2, res) integer(2) :: arg1 real(4) :: arg2(4) @@ -484,7 +485,7 @@ subroutine vec_lxv_testf32a(arg1, arg2, res) ! LLVMIR_P9: store <4 x float> %[[ld]], ptr %2, align 16 end subroutine vec_lxv_testf32a -! CHECK-LABEL: @vec_lxv_testf64a +! LLVM-LABEL: @vec_lxv_testf64a subroutine vec_lxv_testf64a(arg1, arg2, res) integer(8) :: arg1 real(8) :: arg2(4) @@ -501,7 +502,7 @@ end subroutine vec_lxv_testf64a ! vec_xld2 !---------------------- -! CHECK-LABEL: @vec_xld2_testi8a +! LLVM-LABEL: @vec_xld2_testi8a subroutine vec_xld2_testi8a(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2(4) @@ -515,7 +516,7 @@ subroutine vec_xld2_testi8a(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi8a -! CHECK-LABEL: @vec_xld2_testi16 +! LLVM-LABEL: @vec_xld2_testi16 subroutine vec_xld2_testi16(arg1, arg2, res) integer :: arg1 vector(integer(2)) :: arg2 @@ -529,7 +530,7 @@ subroutine vec_xld2_testi16(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi16 -! CHECK-LABEL: @vec_xld2_testi32a +! LLVM-LABEL: @vec_xld2_testi32a subroutine vec_xld2_testi32a(arg1, arg2, res) integer(4) :: arg1 vector(integer(4)) :: arg2(41) @@ -543,7 +544,7 @@ subroutine vec_xld2_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi32a -! CHECK-LABEL: @vec_xld2_testi64a +! LLVM-LABEL: @vec_xld2_testi64a subroutine vec_xld2_testi64a(arg1, arg2, res) integer(8) :: arg1 vector(integer(8)) :: arg2(4) @@ -557,7 +558,7 @@ subroutine vec_xld2_testi64a(arg1, arg2, res) ! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testi64a -! CHECK-LABEL: @vec_xld2_testf32a +! LLVM-LABEL: @vec_xld2_testf32a subroutine vec_xld2_testf32a(arg1, arg2, res) integer(2) :: arg1 vector(real(4)) :: arg2(4) @@ -571,7 +572,7 @@ subroutine vec_xld2_testf32a(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_xld2_testf32a -! CHECK-LABEL: @vec_xld2_testf64a +! LLVM-LABEL: @vec_xld2_testf64a subroutine vec_xld2_testf64a(arg1, arg2, res) integer(8) :: arg1 vector(real(8)) :: arg2(4) @@ -588,7 +589,7 @@ end subroutine vec_xld2_testf64a ! vec_xl !---------------------- -! CHECK-LABEL: @vec_xl_testi8a +! LLVM-LABEL: @vec_xl_testi8a subroutine vec_xl_testi8a(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2(4) @@ -601,7 +602,7 @@ subroutine vec_xl_testi8a(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_xl_testi8a -! CHECK-LABEL: @vec_xl_testi16a +! LLVM-LABEL: @vec_xl_testi16a subroutine vec_xl_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -614,7 +615,7 @@ subroutine vec_xl_testi16a(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[ld]], ptr %2, align 16 end subroutine vec_xl_testi16a -! CHECK-LABEL: @vec_xl_testi32a +! LLVM-LABEL: @vec_xl_testi32a subroutine vec_xl_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 4, 8) @@ -627,7 +628,7 @@ subroutine vec_xl_testi32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_xl_testi32a -! CHECK-LABEL: @vec_xl_testi64a +! LLVM-LABEL: @vec_xl_testi64a subroutine vec_xl_testi64a(arg1, arg2, res) integer(8) :: arg1 integer(8) :: arg2(2, 4, 8) @@ -641,7 +642,7 @@ subroutine vec_xl_testi64a(arg1, arg2, res) ! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16 end subroutine vec_xl_testi64a -! CHECK-LABEL: @vec_xl_testf32a +! LLVM-LABEL: @vec_xl_testf32a subroutine vec_xl_testf32a(arg1, arg2, res) integer(2) :: arg1 real(4) :: arg2(4) @@ -655,7 +656,7 @@ subroutine vec_xl_testf32a(arg1, arg2, res) ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16 end subroutine vec_xl_testf32a -! CHECK-LABEL: @vec_xl_testf64a +! LLVM-LABEL: @vec_xl_testf64a subroutine vec_xl_testf64a(arg1, arg2, res) integer(8) :: arg1 real(8) :: arg2 @@ -672,7 +673,7 @@ end subroutine vec_xl_testf64a ! vec_xlds !---------------------- -! CHECK-LABEL: @vec_xlds_testi64a +! LLVM-LABEL: @vec_xlds_testi64a subroutine vec_xlds_testi64a(arg1, arg2, res) integer(8) :: arg1 vector(integer(8)) :: arg2(4) @@ -687,7 +688,7 @@ subroutine vec_xlds_testi64a(arg1, arg2, res) ! LLVMIR: store <2 x i64> %[[shfl]], ptr %2, align 16 end subroutine vec_xlds_testi64a -! CHECK-LABEL: @vec_xlds_testf64a +! LLVM-LABEL: @vec_xlds_testf64a subroutine vec_xlds_testf64a(arg1, arg2, res) integer(8) :: arg1 vector(real(8)) :: arg2(4) @@ -707,7 +708,7 @@ end subroutine vec_xlds_testf64a ! vec_xl_be !---------------------- -! CHECK-LABEL: @vec_xl_be_testi8a +! LLVM-LABEL: @vec_xl_be_testi8a subroutine vec_xl_be_testi8a(arg1, arg2, res) integer(1) :: arg1 integer(1) :: arg2(2, 4, 8) @@ -722,7 +723,7 @@ subroutine vec_xl_be_testi8a(arg1, arg2, res) ! LLVMIR-BE: store <16 x i8> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi8a -! CHECK-LABEL: @vec_xl_be_testi16a +! LLVM-LABEL: @vec_xl_be_testi16a subroutine vec_xl_be_testi16a(arg1, arg2, res) integer(2) :: arg1 integer(2) :: arg2(2, 4, 8) @@ -737,7 +738,7 @@ subroutine vec_xl_be_testi16a(arg1, arg2, res) ! LLVMIR-BE: store <8 x i16> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi16a -! CHECK-LABEL: @vec_xl_be_testi32a +! LLVM-LABEL: @vec_xl_be_testi32a subroutine vec_xl_be_testi32a(arg1, arg2, res) integer(4) :: arg1 integer(4) :: arg2(2, 4, 8) @@ -752,7 +753,7 @@ subroutine vec_xl_be_testi32a(arg1, arg2, res) ! LLVMIR-BE: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi32a -! CHECK-LABEL: @vec_xl_be_testi64a +! LLVM-LABEL: @vec_xl_be_testi64a subroutine vec_xl_be_testi64a(arg1, arg2, res) integer(8) :: arg1 integer(8) :: arg2(2, 4, 8) @@ -767,7 +768,7 @@ subroutine vec_xl_be_testi64a(arg1, arg2, res) ! LLVMIR-BE: store <2 x i64> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testi64a -! CHECK-LABEL: @vec_xl_be_testf32a +! LLVM-LABEL: @vec_xl_be_testf32a subroutine vec_xl_be_testf32a(arg1, arg2, res) integer(2) :: arg1 real(4) :: arg2(4) @@ -782,7 +783,7 @@ subroutine vec_xl_be_testf32a(arg1, arg2, res) ! LLVMIR-BE: store <4 x float> %[[ld]], ptr %2, align 16 end subroutine vec_xl_be_testf32a -! CHECK-LABEL: @vec_xl_be_testf64a +! LLVM-LABEL: @vec_xl_be_testf64a subroutine vec_xl_be_testf64a(arg1, arg2, res) integer(8) :: arg1 real(8) :: arg2(7) @@ -801,7 +802,7 @@ end subroutine vec_xl_be_testf64a ! vec_xlw4 !---------------------- -! CHECK-LABEL: @vec_xlw4_testi8a +! LLVM-LABEL: @vec_xlw4_testi8a subroutine vec_xlw4_testi8a(arg1, arg2, res) integer(1) :: arg1 vector(integer(1)) :: arg2(2, 4, 8) @@ -815,7 +816,7 @@ subroutine vec_xlw4_testi8a(arg1, arg2, res) ! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16 end subroutine vec_xlw4_testi8a -! CHECK-LABEL: @vec_xlw4_testi16a +! LLVM-LABEL: @vec_xlw4_testi16a subroutine vec_xlw4_testi16a(arg1, arg2, res) integer(2) :: arg1 vector(integer(2)) :: arg2(2, 4, 8) @@ -829,7 +830,7 @@ subroutine vec_xlw4_testi16a(arg1, arg2, res) ! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16 end subroutine vec_xlw4_testi16a -! CHECK-LABEL: @vec_xlw4_testu32a +! LLVM-LABEL: @vec_xlw4_testu32a subroutine vec_xlw4_testu32a(arg1, arg2, res) integer(4) :: arg1 vector(unsigned(4)) :: arg2(2, 4, 8) @@ -842,7 +843,7 @@ subroutine vec_xlw4_testu32a(arg1, arg2, res) ! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16 end subroutine vec_xlw4_testu32a -! CHECK-LABEL: @vec_xlw4_testf32a +! LLVM-LABEL: @vec_xlw4_testf32a subroutine vec_xlw4_testf32a(arg1, arg2, res) integer(2) :: arg1 vector(real(4)) :: arg2(4) diff --git a/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 b/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 index bd83f28b4eeb52..6c4f202f89a456 100644 --- a/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 +++ b/flang/test/Lower/PowerPC/ppc-vec-shift-be-le.f90 @@ -1,13 +1,13 @@ -! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="CHECK" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64le-unknown-linux -o - | FileCheck --check-prefixes="LLVMIR","LLVM" %s ! -! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-LLVMIR" %s +! RUN: %flang_fc1 -flang-experimental-hlfir -emit-llvm %s -triple ppc64-unknown-aix -o - | FileCheck --check-prefixes="BE-LLVMIR","LLVM" %s ! REQUIRES: target=powerpc{{.*}} !---------------------- ! vec_sld !---------------------- -! CHECK-LABEL: vec_sld_test_i1i1 +! LLVM-LABEL: vec_sld_test_i1i1 subroutine vec_sld_test_i1i1(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -23,7 +23,7 @@ subroutine vec_sld_test_i1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i1 -! CHECK-LABEL: vec_sld_test_i1i2 +! LLVM-LABEL: vec_sld_test_i1i2 subroutine vec_sld_test_i1i2(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -39,7 +39,7 @@ subroutine vec_sld_test_i1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i2 -! CHECK-LABEL: vec_sld_test_i1i4 +! LLVM-LABEL: vec_sld_test_i1i4 subroutine vec_sld_test_i1i4(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -55,7 +55,7 @@ subroutine vec_sld_test_i1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i4 -! CHECK-LABEL: vec_sld_test_i1i8 +! LLVM-LABEL: vec_sld_test_i1i8 subroutine vec_sld_test_i1i8(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -71,7 +71,7 @@ subroutine vec_sld_test_i1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i1i8 -! CHECK-LABEL: vec_sld_test_i2i1 +! LLVM-LABEL: vec_sld_test_i2i1 subroutine vec_sld_test_i2i1(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -93,7 +93,7 @@ subroutine vec_sld_test_i2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i1 -! CHECK-LABEL: vec_sld_test_i2i2 +! LLVM-LABEL: vec_sld_test_i2i2 subroutine vec_sld_test_i2i2(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 8_2) @@ -115,7 +115,7 @@ subroutine vec_sld_test_i2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i2 -! CHECK-LABEL: vec_sld_test_i2i4 +! LLVM-LABEL: vec_sld_test_i2i4 subroutine vec_sld_test_i2i4(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -137,7 +137,7 @@ subroutine vec_sld_test_i2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i4 -! CHECK-LABEL: vec_sld_test_i2i8 +! LLVM-LABEL: vec_sld_test_i2i8 subroutine vec_sld_test_i2i8(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 11_8) @@ -159,7 +159,7 @@ subroutine vec_sld_test_i2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i2i8 -! CHECK-LABEL: vec_sld_test_i4i1 +! LLVM-LABEL: vec_sld_test_i4i1 subroutine vec_sld_test_i4i1(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -181,7 +181,7 @@ subroutine vec_sld_test_i4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i1 -! CHECK-LABEL: vec_sld_test_i4i2 +! LLVM-LABEL: vec_sld_test_i4i2 subroutine vec_sld_test_i4i2(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -203,7 +203,7 @@ subroutine vec_sld_test_i4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i2 -! CHECK-LABEL: vec_sld_test_i4i4 +! LLVM-LABEL: vec_sld_test_i4i4 subroutine vec_sld_test_i4i4(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -225,7 +225,7 @@ subroutine vec_sld_test_i4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i4 -! CHECK-LABEL: vec_sld_test_i4i8 +! LLVM-LABEL: vec_sld_test_i4i8 subroutine vec_sld_test_i4i8(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -247,7 +247,7 @@ subroutine vec_sld_test_i4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_i4i8 -! CHECK-LABEL: vec_sld_test_u1i1 +! LLVM-LABEL: vec_sld_test_u1i1 subroutine vec_sld_test_u1i1(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -263,7 +263,7 @@ subroutine vec_sld_test_u1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i1 -! CHECK-LABEL: vec_sld_test_u1i2 +! LLVM-LABEL: vec_sld_test_u1i2 subroutine vec_sld_test_u1i2(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -279,7 +279,7 @@ subroutine vec_sld_test_u1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i2 -! CHECK-LABEL: vec_sld_test_u1i4 +! LLVM-LABEL: vec_sld_test_u1i4 subroutine vec_sld_test_u1i4(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -295,7 +295,7 @@ subroutine vec_sld_test_u1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i4 -! CHECK-LABEL: vec_sld_test_u1i8 +! LLVM-LABEL: vec_sld_test_u1i8 subroutine vec_sld_test_u1i8(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -311,7 +311,7 @@ subroutine vec_sld_test_u1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u1i8 -! CHECK-LABEL: vec_sld_test_u2i1 +! LLVM-LABEL: vec_sld_test_u2i1 subroutine vec_sld_test_u2i1(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -333,7 +333,7 @@ subroutine vec_sld_test_u2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i1 -! CHECK-LABEL: vec_sld_test_u2i2 +! LLVM-LABEL: vec_sld_test_u2i2 subroutine vec_sld_test_u2i2(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -355,7 +355,7 @@ subroutine vec_sld_test_u2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i2 -! CHECK-LABEL: vec_sld_test_u2i4 +! LLVM-LABEL: vec_sld_test_u2i4 subroutine vec_sld_test_u2i4(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -377,7 +377,7 @@ subroutine vec_sld_test_u2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i4 -! CHECK-LABEL: vec_sld_test_u2i8 +! LLVM-LABEL: vec_sld_test_u2i8 subroutine vec_sld_test_u2i8(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -399,7 +399,7 @@ subroutine vec_sld_test_u2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u2i8 -! CHECK-LABEL: vec_sld_test_u4i1 +! LLVM-LABEL: vec_sld_test_u4i1 subroutine vec_sld_test_u4i1(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -421,7 +421,7 @@ subroutine vec_sld_test_u4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i1 -! CHECK-LABEL: vec_sld_test_u4i2 +! LLVM-LABEL: vec_sld_test_u4i2 subroutine vec_sld_test_u4i2(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -443,7 +443,7 @@ subroutine vec_sld_test_u4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i2 -! CHECK-LABEL: vec_sld_test_u4i4 +! LLVM-LABEL: vec_sld_test_u4i4 subroutine vec_sld_test_u4i4(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -465,7 +465,7 @@ subroutine vec_sld_test_u4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i4 -! CHECK-LABEL: vec_sld_test_u4i8 +! LLVM-LABEL: vec_sld_test_u4i8 subroutine vec_sld_test_u4i8(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_8) @@ -487,7 +487,7 @@ subroutine vec_sld_test_u4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_u4i8 -! CHECK-LABEL: vec_sld_test_r4i1 +! LLVM-LABEL: vec_sld_test_r4i1 subroutine vec_sld_test_r4i1(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_1) @@ -509,7 +509,7 @@ subroutine vec_sld_test_r4i1(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_r4i1 -! CHECK-LABEL: vec_sld_test_r4i2 +! LLVM-LABEL: vec_sld_test_r4i2 subroutine vec_sld_test_r4i2(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_2) @@ -531,7 +531,7 @@ subroutine vec_sld_test_r4i2(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_r4i2 -! CHECK-LABEL: vec_sld_test_r4i4 +! LLVM-LABEL: vec_sld_test_r4i4 subroutine vec_sld_test_r4i4(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 3_4) @@ -553,7 +553,7 @@ subroutine vec_sld_test_r4i4(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sld_test_r4i4 -! CHECK-LABEL: vec_sld_test_r4i8 +! LLVM-LABEL: vec_sld_test_r4i8 subroutine vec_sld_test_r4i8(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sld(arg1, arg2, 1_8) @@ -578,7 +578,7 @@ end subroutine vec_sld_test_r4i8 !---------------------- ! vec_sldw !---------------------- -! CHECK-LABEL: vec_sldw_test_i1i1 +! LLVM-LABEL: vec_sldw_test_i1i1 subroutine vec_sldw_test_i1i1(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -594,7 +594,7 @@ subroutine vec_sldw_test_i1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i1 -! CHECK-LABEL: vec_sldw_test_i1i2 +! LLVM-LABEL: vec_sldw_test_i1i2 subroutine vec_sldw_test_i1i2(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -610,7 +610,7 @@ subroutine vec_sldw_test_i1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i2 -! CHECK-LABEL: vec_sldw_test_i1i4 +! LLVM-LABEL: vec_sldw_test_i1i4 subroutine vec_sldw_test_i1i4(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -626,7 +626,7 @@ subroutine vec_sldw_test_i1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i4 -! CHECK-LABEL: vec_sldw_test_i1i8 +! LLVM-LABEL: vec_sldw_test_i1i8 subroutine vec_sldw_test_i1i8(arg1, arg2) vector(integer(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -642,7 +642,7 @@ subroutine vec_sldw_test_i1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i1i8 -! CHECK-LABEL: vec_sldw_test_i2i1 +! LLVM-LABEL: vec_sldw_test_i2i1 subroutine vec_sldw_test_i2i1(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -664,7 +664,7 @@ subroutine vec_sldw_test_i2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i1 -! CHECK-LABEL: vec_sldw_test_i2i2 +! LLVM-LABEL: vec_sldw_test_i2i2 subroutine vec_sldw_test_i2i2(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -686,7 +686,7 @@ subroutine vec_sldw_test_i2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i2 -! CHECK-LABEL: vec_sldw_test_i2i4 +! LLVM-LABEL: vec_sldw_test_i2i4 subroutine vec_sldw_test_i2i4(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -708,7 +708,7 @@ subroutine vec_sldw_test_i2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i4 -! CHECK-LABEL: vec_sldw_test_i2i8 +! LLVM-LABEL: vec_sldw_test_i2i8 subroutine vec_sldw_test_i2i8(arg1, arg2) vector(integer(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -730,7 +730,7 @@ subroutine vec_sldw_test_i2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i2i8 -! CHECK-LABEL: vec_sldw_test_i4i1 +! LLVM-LABEL: vec_sldw_test_i4i1 subroutine vec_sldw_test_i4i1(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -752,7 +752,7 @@ subroutine vec_sldw_test_i4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i1 -! CHECK-LABEL: vec_sldw_test_i4i2 +! LLVM-LABEL: vec_sldw_test_i4i2 subroutine vec_sldw_test_i4i2(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -774,7 +774,7 @@ subroutine vec_sldw_test_i4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i2 -! CHECK-LABEL: vec_sldw_test_i4i4 +! LLVM-LABEL: vec_sldw_test_i4i4 subroutine vec_sldw_test_i4i4(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -796,7 +796,7 @@ subroutine vec_sldw_test_i4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i4 -! CHECK-LABEL: vec_sldw_test_i4i8 +! LLVM-LABEL: vec_sldw_test_i4i8 subroutine vec_sldw_test_i4i8(arg1, arg2) vector(integer(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -818,7 +818,7 @@ subroutine vec_sldw_test_i4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i4i8 -! CHECK-LABEL: vec_sldw_test_i8i1 +! LLVM-LABEL: vec_sldw_test_i8i1 subroutine vec_sldw_test_i8i1(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -840,7 +840,7 @@ subroutine vec_sldw_test_i8i1(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i8i1 -! CHECK-LABEL: vec_sldw_test_i8i2 +! LLVM-LABEL: vec_sldw_test_i8i2 subroutine vec_sldw_test_i8i2(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -862,7 +862,7 @@ subroutine vec_sldw_test_i8i2(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i8i2 -! CHECK-LABEL: vec_sldw_test_i8i4 +! LLVM-LABEL: vec_sldw_test_i8i4 subroutine vec_sldw_test_i8i4(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -884,7 +884,7 @@ subroutine vec_sldw_test_i8i4(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_i8i4 -! CHECK-LABEL: vec_sldw_test_i8i8 +! LLVM-LABEL: vec_sldw_test_i8i8 subroutine vec_sldw_test_i8i8(arg1, arg2) vector(integer(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -907,7 +907,7 @@ subroutine vec_sldw_test_i8i8(arg1, arg2) end subroutine vec_sldw_test_i8i8 -! CHECK-LABEL: vec_sldw_test_u1i1 +! LLVM-LABEL: vec_sldw_test_u1i1 subroutine vec_sldw_test_u1i1(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -923,7 +923,7 @@ subroutine vec_sldw_test_u1i1(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i1 -! CHECK-LABEL: vec_sldw_test_u1i2 +! LLVM-LABEL: vec_sldw_test_u1i2 subroutine vec_sldw_test_u1i2(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -939,7 +939,7 @@ subroutine vec_sldw_test_u1i2(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i2 -! CHECK-LABEL: vec_sldw_test_u1i4 +! LLVM-LABEL: vec_sldw_test_u1i4 subroutine vec_sldw_test_u1i4(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -955,7 +955,7 @@ subroutine vec_sldw_test_u1i4(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i4 -! CHECK-LABEL: vec_sldw_test_u1i8 +! LLVM-LABEL: vec_sldw_test_u1i8 subroutine vec_sldw_test_u1i8(arg1, arg2) vector(unsigned(1)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -971,7 +971,7 @@ subroutine vec_sldw_test_u1i8(arg1, arg2) ! BE-LLVMIR: store <16 x i8> %[[r]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u1i8 -! CHECK-LABEL: vec_sldw_test_u2i1 +! LLVM-LABEL: vec_sldw_test_u2i1 subroutine vec_sldw_test_u2i1(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -993,7 +993,7 @@ subroutine vec_sldw_test_u2i1(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i1 -! CHECK-LABEL: vec_sldw_test_u2i2 +! LLVM-LABEL: vec_sldw_test_u2i2 subroutine vec_sldw_test_u2i2(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1015,7 +1015,7 @@ subroutine vec_sldw_test_u2i2(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i2 -! CHECK-LABEL: vec_sldw_test_u2i4 +! LLVM-LABEL: vec_sldw_test_u2i4 subroutine vec_sldw_test_u2i4(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1037,7 +1037,7 @@ subroutine vec_sldw_test_u2i4(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i4 -! CHECK-LABEL: vec_sldw_test_u2i8 +! LLVM-LABEL: vec_sldw_test_u2i8 subroutine vec_sldw_test_u2i8(arg1, arg2) vector(unsigned(2)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1059,7 +1059,7 @@ subroutine vec_sldw_test_u2i8(arg1, arg2) ! BE-LLVMIR: store <8 x i16> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u2i8 -! CHECK-LABEL: vec_sldw_test_u4i1 +! LLVM-LABEL: vec_sldw_test_u4i1 subroutine vec_sldw_test_u4i1(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1081,7 +1081,7 @@ subroutine vec_sldw_test_u4i1(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i1 -! CHECK-LABEL: vec_sldw_test_u4i2 +! LLVM-LABEL: vec_sldw_test_u4i2 subroutine vec_sldw_test_u4i2(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1103,7 +1103,7 @@ subroutine vec_sldw_test_u4i2(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i2 -! CHECK-LABEL: vec_sldw_test_u4i4 +! LLVM-LABEL: vec_sldw_test_u4i4 subroutine vec_sldw_test_u4i4(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1125,7 +1125,7 @@ subroutine vec_sldw_test_u4i4(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i4 -! CHECK-LABEL: vec_sldw_test_u4i8 +! LLVM-LABEL: vec_sldw_test_u4i8 subroutine vec_sldw_test_u4i8(arg1, arg2) vector(unsigned(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1147,7 +1147,7 @@ subroutine vec_sldw_test_u4i8(arg1, arg2) ! BE-LLVMIR: store <4 x i32> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u4i8 -! CHECK-LABEL: vec_sldw_test_u8i1 +! LLVM-LABEL: vec_sldw_test_u8i1 subroutine vec_sldw_test_u8i1(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1169,7 +1169,7 @@ subroutine vec_sldw_test_u8i1(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i1 -! CHECK-LABEL: vec_sldw_test_u8i2 +! LLVM-LABEL: vec_sldw_test_u8i2 subroutine vec_sldw_test_u8i2(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1191,7 +1191,7 @@ subroutine vec_sldw_test_u8i2(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i2 -! CHECK-LABEL: vec_sldw_test_u8i4 +! LLVM-LABEL: vec_sldw_test_u8i4 subroutine vec_sldw_test_u8i4(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1213,7 +1213,7 @@ subroutine vec_sldw_test_u8i4(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i4 -! CHECK-LABEL: vec_sldw_test_u8i8 +! LLVM-LABEL: vec_sldw_test_u8i8 subroutine vec_sldw_test_u8i8(arg1, arg2) vector(unsigned(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1235,7 +1235,7 @@ subroutine vec_sldw_test_u8i8(arg1, arg2) ! BE-LLVMIR: store <2 x i64> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_u8i8 -! CHECK-LABEL: vec_sldw_test_r4i1 +! LLVM-LABEL: vec_sldw_test_r4i1 subroutine vec_sldw_test_r4i1(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1257,7 +1257,7 @@ subroutine vec_sldw_test_r4i1(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i1 -! CHECK-LABEL: vec_sldw_test_r4i2 +! LLVM-LABEL: vec_sldw_test_r4i2 subroutine vec_sldw_test_r4i2(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1279,7 +1279,7 @@ subroutine vec_sldw_test_r4i2(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i2 -! CHECK-LABEL: vec_sldw_test_r4i4 +! LLVM-LABEL: vec_sldw_test_r4i4 subroutine vec_sldw_test_r4i4(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1301,7 +1301,7 @@ subroutine vec_sldw_test_r4i4(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i4 -! CHECK-LABEL: vec_sldw_test_r4i8 +! LLVM-LABEL: vec_sldw_test_r4i8 subroutine vec_sldw_test_r4i8(arg1, arg2) vector(real(4)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) @@ -1323,7 +1323,7 @@ subroutine vec_sldw_test_r4i8(arg1, arg2) ! BE-LLVMIR: store <4 x float> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r4i8 -! CHECK-LABEL: vec_sldw_test_r8i1 +! LLVM-LABEL: vec_sldw_test_r8i1 subroutine vec_sldw_test_r8i1(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_1) @@ -1345,7 +1345,7 @@ subroutine vec_sldw_test_r8i1(arg1, arg2) ! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r8i1 -! CHECK-LABEL: vec_sldw_test_r8i2 +! LLVM-LABEL: vec_sldw_test_r8i2 subroutine vec_sldw_test_r8i2(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_2) @@ -1367,7 +1367,7 @@ subroutine vec_sldw_test_r8i2(arg1, arg2) ! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r8i2 -! CHECK-LABEL: vec_sldw_test_r8i4 +! LLVM-LABEL: vec_sldw_test_r8i4 subroutine vec_sldw_test_r8i4(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_4) @@ -1389,7 +1389,7 @@ subroutine vec_sldw_test_r8i4(arg1, arg2) ! BE-LLVMIR: store <2 x double> %[[br]], ptr %{{.*}}, align 16 end subroutine vec_sldw_test_r8i4 -! CHECK-LABEL: vec_sldw_test_r8i8 +! LLVM-LABEL: vec_sldw_test_r8i8 subroutine vec_sldw_test_r8i8(arg1, arg2) vector(real(8)) :: arg1, arg2, r r = vec_sldw(arg1, arg2, 3_8) diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 91b8cb71552a71..66b82c84dac499 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -87,4 +87,14 @@ add_proxy_header_library( libc.include.llvm-libc-macros.time_macros ) +add_proxy_header_library( + float_macros + HDRS + float_macros.h + DEPENDS + libc.include.llvm-libc-macros.float_macros + FULL_BUILD_DEPENDS + libc.include.float +) + add_subdirectory(types) diff --git a/libc/hdr/float_macros.h b/libc/hdr/float_macros.h new file mode 100644 index 00000000000000..a0ef5e29b98687 --- /dev/null +++ b/libc/hdr/float_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from math.h ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_FLOAT_MACROS_H +#define LLVM_LIBC_HDR_FLOAT_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/float-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_FLOAT_MACROS_H diff --git a/libc/include/llvm-libc-macros/float-macros.h b/libc/include/llvm-libc-macros/float-macros.h index 4fe8590c5f70c8..81c1df868bf6cd 100644 --- a/libc/include/llvm-libc-macros/float-macros.h +++ b/libc/include/llvm-libc-macros/float-macros.h @@ -9,21 +9,6 @@ #ifndef LLVM_LIBC_MACROS_FLOAT_MACROS_H #define LLVM_LIBC_MACROS_FLOAT_MACROS_H -// Suppress `#include_next is a language extension` warnings. -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wgnu-include-next" -#pragma clang diagnostic ignored "-Winclude-next-absolute-path" -#else // gcc -#pragma GCC system_header -#endif //__clang__ - -#include_next - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif //__clang__ - #ifndef FLT_RADIX #define FLT_RADIX __FLT_RADIX__ #endif // FLT_RADIX @@ -32,9 +17,13 @@ #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ #endif // FLT_EVAL_METHOD -#ifndef DECIMAL_DIG -#define DECIMAL_DIG __DECIMAL_DIG__ -#endif // DECIMAL_DIG +#ifndef FLT_ROUNDS +#if __has_builtin(__builtin_flt_rounds) +#define FLT_ROUNDS __builtin_flt_rounds() +#else +#define FLT_ROUNDS 1 +#endif +#endif // FLT_ROUNDS #ifndef FLT_DECIMAL_DIG #define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__ @@ -48,6 +37,10 @@ #define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__ #endif // LDBL_DECIMAL_DIG +#ifndef DECIMAL_DIG +#define DECIMAL_DIG __DECIMAL_DIG__ +#endif // DECIMAL_DIG + #ifndef FLT_DIG #define FLT_DIG __FLT_DIG__ #endif // FLT_DIG @@ -97,15 +90,15 @@ #endif // LDBL_MAX #ifndef FLT_TRUE_MIN -#define FLT_TRUE_MIN __FLT_TRUE_MIN__ +#define FLT_TRUE_MIN __FLT_DENORM_MIN__ #endif // FLT_TRUE_MIN #ifndef DBL_TRUE_MIN -#define DBL_TRUE_MIN __DBL_TRUE_MIN__ +#define DBL_TRUE_MIN __DBL_DENORM_MIN__ #endif // DBL_TRUE_MIN #ifndef LDBL_TRUE_MIN -#define LDBL_TRUE_MIN __LDBL_TRUE_MIN__ +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ #endif // LDBL_TRUE_MIN #ifndef FLT_EPSILON diff --git a/libc/src/__support/macros/properties/CMakeLists.txt b/libc/src/__support/macros/properties/CMakeLists.txt index bbc45650f3fca3..7718aeaa3de5af 100644 --- a/libc/src/__support/macros/properties/CMakeLists.txt +++ b/libc/src/__support/macros/properties/CMakeLists.txt @@ -33,6 +33,6 @@ add_header_library( .compiler .cpu_features .os - libc.include.llvm-libc-macros.float_macros + libc.hdr.float_macros libc.include.llvm-libc-types.float128 ) diff --git a/libc/src/__support/macros/properties/types.h b/libc/src/__support/macros/properties/types.h index d43cf99e6859be..781cf1b7a2b627 100644 --- a/libc/src/__support/macros/properties/types.h +++ b/libc/src/__support/macros/properties/types.h @@ -10,7 +10,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H #define LLVM_LIBC_SRC___SUPPORT_MACROS_PROPERTIES_TYPES_H -#include "include/llvm-libc-macros/float-macros.h" // LDBL_MANT_DIG +#include "hdr/float_macros.h" // LDBL_MANT_DIG #include "include/llvm-libc-types/float128.h" // float128 #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/compiler.h" diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index 39c4ad20201ca6..f6913ef0834289 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -75,4 +75,5 @@ add_object_library( libc.src.__support.OSUtil.osutil libc.src.__support.threads.linux.futex_word_type libc.src.__support.threads.mutex + libc.src.__support.CPP.mutex ) diff --git a/libc/src/__support/threads/linux/CndVar.cpp b/libc/src/__support/threads/linux/CndVar.cpp index daf56bca1ed21b..b3a0fdbda4e9ea 100644 --- a/libc/src/__support/threads/linux/CndVar.cpp +++ b/libc/src/__support/threads/linux/CndVar.cpp @@ -7,9 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/__support/threads/CndVar.h" +#include "src/__support/CPP/mutex.h" #include "src/__support/OSUtil/syscall.h" // syscall_impl #include "src/__support/threads/linux/futex_word.h" // FutexWordType -#include "src/__support/threads/mutex.h" // Mutex, MutexLock +#include "src/__support/threads/mutex.h" // Mutex #include // For syscall numbers. @@ -27,7 +28,7 @@ int CndVar::wait(Mutex *m) { CndWaiter waiter; { - MutexLock ml(&qmtx); + cpp::lock_guard ml(qmtx); CndWaiter *old_back = nullptr; if (waitq_front == nullptr) { waitq_front = waitq_back = &waiter; @@ -83,7 +84,7 @@ void CndVar::notify_one() { } void CndVar::broadcast() { - MutexLock ml(&qmtx); + cpp::lock_guard ml(qmtx); uint32_t dummy_futex_word; CndWaiter *waiter = waitq_front; waitq_front = waitq_back = nullptr; diff --git a/libc/src/__support/threads/mutex.h b/libc/src/__support/threads/mutex.h index 9dded2e3f952a1..392b38984dc0ae 100644 --- a/libc/src/__support/threads/mutex.h +++ b/libc/src/__support/threads/mutex.h @@ -43,18 +43,4 @@ #include "src/__support/threads/gpu/mutex.h" #endif // __linux__ -namespace LIBC_NAMESPACE { - -// An RAII class for easy locking and unlocking of mutexes. -class MutexLock { - Mutex *mutex; - -public: - explicit MutexLock(Mutex *m) : mutex(m) { mutex->lock(); } - - ~MutexLock() { mutex->unlock(); } -}; - -} // namespace LIBC_NAMESPACE - #endif // LLVM_LIBC_SRC___SUPPORT_THREADS_MUTEX_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index daaf505008ca11..269bc6be5d8343 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2933,6 +2933,7 @@ add_entrypoint_object( HDRS ../scalbn.h DEPENDS + libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 @@ -2945,6 +2946,7 @@ add_entrypoint_object( HDRS ../scalbnf.h DEPENDS + libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 @@ -2957,6 +2959,7 @@ add_entrypoint_object( HDRS ../scalbnl.h DEPENDS + libc.hdr.float_macros libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS -O3 @@ -2969,6 +2972,7 @@ add_entrypoint_object( HDRS ../scalbnf128.h DEPENDS + libc.hdr.float_macros libc.src.__support.macros.properties.types libc.src.__support.FPUtil.manipulation_functions COMPILE_OPTIONS diff --git a/libc/src/math/generic/scalbn.cpp b/libc/src/math/generic/scalbn.cpp index 3908f5892f144f..207cce1550bc01 100644 --- a/libc/src/math/generic/scalbn.cpp +++ b/libc/src/math/generic/scalbn.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbn.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, scalbn, (double x, int n)) { -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/scalbnf.cpp b/libc/src/math/generic/scalbnf.cpp index 4a4fa86dcfd895..e478088d3ce5a5 100644 --- a/libc/src/math/generic/scalbnf.cpp +++ b/libc/src/math/generic/scalbnf.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbnf.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, scalbnf, (float x, int n)) { -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/scalbnf128.cpp b/libc/src/math/generic/scalbnf128.cpp index be3d29ed27e985..5fd59611d53de7 100644 --- a/libc/src/math/generic/scalbnf128.cpp +++ b/libc/src/math/generic/scalbnf128.cpp @@ -7,21 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbnf128.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float128, scalbnf128, (float128 x, int n)) { -// TODO: should be switched to use `FLT_RADIX` in hdr/float_macros.h" instead -// see: https://github.com/llvm/llvm-project/issues/90496 -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/scalbnl.cpp b/libc/src/math/generic/scalbnl.cpp index 681338ec01f078..1225a7ebaf572d 100644 --- a/libc/src/math/generic/scalbnl.cpp +++ b/libc/src/math/generic/scalbnl.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "src/math/scalbnl.h" +#include "hdr/float_macros.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/__support/common.h" +#if FLT_RADIX != 2 +#error "FLT_RADIX != 2 is not supported." +#endif + namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(long double, scalbnl, (long double x, int n)) { -#if !defined(__FLT_RADIX__) -#error __FLT_RADIX__ undefined. -#elif __FLT_RADIX__ != 2 -#error __FLT_RADIX__!=2, unimplemented. -#else return fputil::ldexp(x, n); -#endif } } // namespace LIBC_NAMESPACE diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 17d2da907692e8..0297068785e8b8 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -326,8 +326,6 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_expected`` ``202211L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_format_path`` *unimplemented* - ---------------------------------------------------------- ----------------- ``__cpp_lib_format_ranges`` ``202207L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_formatters`` *unimplemented* @@ -386,8 +384,6 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_string_resize_and_overwrite`` ``202110L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_to_string`` *unimplemented* - ---------------------------------------------------------- ----------------- ``__cpp_lib_to_underlying`` ``202102L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_tuple_like`` *unimplemented* @@ -412,6 +408,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_default_template_type_for_algorithm_values`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_format_path`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_freestanding_algorithm`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_freestanding_array`` *unimplemented* @@ -466,6 +464,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_to_chars`` *unimplemented* ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_string`` *unimplemented* + ---------------------------------------------------------- ----------------- ``__cpp_lib_tuple_like`` *unimplemented* ========================================================== ================= diff --git a/libcxx/include/version b/libcxx/include/version index 69556d731f1cfc..140a9a0d870360 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -459,7 +459,6 @@ __cpp_lib_void_t 201411L # define __cpp_lib_constexpr_typeinfo 202106L # define __cpp_lib_containers_ranges 202202L # define __cpp_lib_expected 202211L -// # define __cpp_lib_format_path 202403L # define __cpp_lib_format_ranges 202207L // # define __cpp_lib_formatters 202302L # define __cpp_lib_forward_like 202207L @@ -490,7 +489,6 @@ __cpp_lib_void_t 201411L # define __cpp_lib_stdatomic_h 202011L # define __cpp_lib_string_contains 202011L # define __cpp_lib_string_resize_and_overwrite 202110L -// # define __cpp_lib_to_string 202306L # define __cpp_lib_to_underlying 202102L // # define __cpp_lib_tuple_like 202207L # define __cpp_lib_unreachable 202202L @@ -506,6 +504,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_copyable_function 202306L // # define __cpp_lib_debugging 202311L // # define __cpp_lib_default_template_type_for_algorithm_values 202403L +// # define __cpp_lib_format_path 202403L // # define __cpp_lib_freestanding_algorithm 202311L // # define __cpp_lib_freestanding_array 202311L // # define __cpp_lib_freestanding_cstring 202306L @@ -537,6 +536,7 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_text_encoding 202306L # undef __cpp_lib_to_chars // # define __cpp_lib_to_chars 202306L +// # define __cpp_lib_to_string 202306L # undef __cpp_lib_tuple_like // # define __cpp_lib_tuple_like 202311L #endif diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp index 3ee213358f3524..08c682964c3745 100644 --- a/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -73,7 +73,7 @@ L link link_to_link TEST_VALIDATE_EXCEPTION( std::runtime_error, [&]([[maybe_unused]] const std::runtime_error& e) { - std::string_view what{"tzdb: requested time zone not found"}; + [[maybe_unused]] std::string_view what{"tzdb: requested time zone not found"}; TEST_LIBCPP_REQUIRE( e.what() == what, TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp index 761691c2afdcb9..890ac23fff8327 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp @@ -24,6 +24,7 @@ // Proj1 proj1 = {}, Proj2 proj2 = {}); // since C++23 #include +#include #include #include #include @@ -130,10 +131,10 @@ constexpr void test_iterators() { } { // range has zero length - int a[] = {}; - int p[] = {3, 4, 2}; - auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); - auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); + std::array a = {}; + int p[] = {3, 4, 2}; + auto whole = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data()))); + auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(std::end(p)))); { bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); assert(!ret); @@ -145,10 +146,10 @@ constexpr void test_iterators() { } { // subrange has zero length - int a[] = {3, 4, 2}; - int p[] = {}; - auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); - auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(p))); + int a[] = {3, 4, 2}; + std::array p = {}; + auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(std::end(a)))); + auto subrange = std::ranges::subrange(Iter2(p.data()), Sent2(Iter2(p.data()))); { bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); assert(ret); @@ -160,10 +161,10 @@ constexpr void test_iterators() { } { // range and subrange both have zero length - int a[] = {}; - int p[] = {}; - auto whole = std::ranges::subrange(Iter1(a), Sent1(Iter1(a))); - auto subrange = std::ranges::subrange(Iter2(p), Sent2(Iter2(p))); + std::array a = {}; + std::array p = {}; + auto whole = std::ranges::subrange(Iter1(a.data()), Sent1(Iter1(a.data()))); + auto subrange = std::ranges::subrange(Iter2(p.data()), Sent2(Iter2(p.data()))); { bool ret = std::ranges::contains_subrange(whole.begin(), whole.end(), subrange.begin(), subrange.end()); assert(ret); diff --git a/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp b/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp index 72b2f444c476c7..90aa5ea5b6df45 100644 --- a/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp +++ b/libcxx/test/std/atomics/atomics.ref/compare_exchange_strong.pass.cpp @@ -9,6 +9,9 @@ // XFAIL: !has-64-bit-atomics // XFAIL: !has-1024-bit-atomics +// MSVC warning C4310: cast truncates constant value +// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4310 + // bool compare_exchange_strong(T&, T, memory_order, memory_order) const noexcept; // bool compare_exchange_strong(T&, T, memory_order = memory_order::seq_cst) const noexcept; diff --git a/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp b/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp index 5219a8e3714f98..99c1385a2fe0b7 100644 --- a/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp +++ b/libcxx/test/std/atomics/atomics.ref/compare_exchange_weak.pass.cpp @@ -9,6 +9,9 @@ // XFAIL: !has-64-bit-atomics // XFAIL: !has-1024-bit-atomics +// MSVC warning C4310: cast truncates constant value +// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4310 + // bool compare_exchange_weak(T&, T, memory_order, memory_order) const noexcept; // bool compare_exchange_weak(T&, T, memory_order = memory_order::seq_cst) const noexcept; diff --git a/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp b/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp index e5310febf5c5eb..f246803ba25925 100644 --- a/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp +++ b/libcxx/test/std/atomics/atomics.ref/wait.pass.cpp @@ -11,6 +11,9 @@ // XFAIL: !has-64-bit-atomics // XFAIL: !has-1024-bit-atomics +// MSVC warning C4310: cast truncates constant value +// ADDITIONAL_COMPILE_FLAGS(cl-style-warnings): /wd4310 + // void wait(T, memory_order = memory_order::seq_cst) const noexcept; #include diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/destory_elements.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/destroy_elements.pass.cpp similarity index 100% rename from libcxx/test/std/containers/sequences/vector/vector.modifiers/destory_elements.pass.cpp rename to libcxx/test/std/containers/sequences/vector/vector.modifiers/destroy_elements.pass.cpp diff --git a/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp index 74a5094f61261d..bc76e23fea3c03 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/initializer_list.pass.cpp @@ -93,9 +93,9 @@ constexpr bool test() { // Test P2447R4 "Annex C examples" -constexpr int three(std::span sp) { return sp.size(); } +constexpr int three(std::span sp) { return static_cast(sp.size()); } -constexpr int four(std::span sp) { return sp.size(); } +constexpr int four(std::span sp) { return static_cast(sp.size()); } bool test_P2447R4_annex_c_examples() { // 1. Overload resolution is affected diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp index d4bbde75ae8821..7283fdc769d86b 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp @@ -50,13 +50,16 @@ int main(int, char**) // responds with an empty message, which we probably want to // treat as a failure code otherwise, but we can detect that // with the preprocessor. +#if defined(_NEWLIB_VERSION) + const bool is_newlib = true; +#else + const bool is_newlib = false; +#endif + (void)is_newlib; LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0 // AIX || msg.rfind("No error information", 0) == 0 // Musl || msg.rfind("Unknown error", 0) == 0 // Glibc -#if defined(_NEWLIB_VERSION) - || msg.empty() -#endif - ); + || (is_newlib && msg.empty())); assert(errno == E2BIG); } diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp index eefbddd27a7f53..02a1baf5999831 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp @@ -56,13 +56,16 @@ int main(int, char**) { // responds with an empty message, which we probably want to // treat as a failure code otherwise, but we can detect that // with the preprocessor. +#if defined(_NEWLIB_VERSION) + const bool is_newlib = true; +#else + const bool is_newlib = false; +#endif + (void)is_newlib; LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0 // AIX || msg.rfind("No error information", 0) == 0 // Musl || msg.rfind("Unknown error", 0) == 0 // Glibc -#if defined(_NEWLIB_VERSION) - || msg.empty() -#endif - ); + || (is_newlib && msg.empty())); assert(errno == E2BIG); } diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp index 5edf22eaacf31f..d6bb56d9b78b79 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp @@ -37,7 +37,7 @@ constexpr bool test_non_convert_to_path() { static_assert(!std::is_constructible_v>); // Char* pointers - if constexpr (!std::is_same_v) + if constexpr (!std::is_same_v && !std::is_same_v) static_assert(!std::is_constructible_v); // Iterators diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp index 2f27fd8e6e93d3..792b65615679a7 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp @@ -38,7 +38,7 @@ constexpr bool test_non_convert_to_path() { static_assert(!std::is_constructible_v>); // Char* pointers - if constexpr (!std::is_same_v) + if constexpr (!std::is_same_v && !std::is_same_v) static_assert(!std::is_constructible_v); // Iterators diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp index e55adfd83fc3c7..602bdadd85813f 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp @@ -37,7 +37,7 @@ constexpr bool test_non_convert_to_path() { static_assert(!std::is_constructible_v>); // Char* pointers - if constexpr (!std::is_same_v) + if constexpr (!std::is_same_v && !std::is_same_v) static_assert(!std::is_constructible_v); // Iterators diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp index cb49086dd6802b..998b13ed494552 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/sized_sentinel.compile.pass.cpp @@ -21,6 +21,7 @@ struct unsized_it { using difference_type = std::ptrdiff_t; value_type& operator*() const; + unsized_it& operator++(); bool operator==(const unsized_it&) const; difference_type operator-(const unsized_it&) const { return 0; } }; diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp index 308cc2d43b0586..4aba33482f69c4 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp @@ -20,7 +20,7 @@ /* Constant Value __cpp_lib_char8_t 201907L [C++20] __cpp_lib_filesystem 201703L [C++17] - __cpp_lib_format_path 202403L [C++23] + __cpp_lib_format_path 202403L [C++26] */ #include @@ -37,7 +37,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 14 @@ -51,7 +51,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 17 @@ -74,7 +74,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 20 @@ -106,7 +106,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER == 23 @@ -137,17 +137,8 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_format_path -# error "__cpp_lib_format_path should be defined in c++23" -# endif -# if __cpp_lib_format_path != 202403L -# error "__cpp_lib_format_path should have the value 202403L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++26" # endif #elif TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp index 16a9a0a28de635..af6386a40a458a 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp @@ -29,7 +29,7 @@ __cpp_lib_string_udls 201304L [C++14] __cpp_lib_string_view 201606L [C++17] 201803L [C++20] - __cpp_lib_to_string 202306L [C++23] + __cpp_lib_to_string 202306L [C++26] */ #include @@ -86,7 +86,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 14 @@ -143,7 +143,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 17 @@ -209,7 +209,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 20 @@ -293,7 +293,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER == 23 @@ -385,17 +385,8 @@ # error "__cpp_lib_string_view should have the value 201803L in c++23" # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_to_string -# error "__cpp_lib_to_string should be defined in c++23" -# endif -# if __cpp_lib_to_string != 202306L -# error "__cpp_lib_to_string should have the value 202306L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_to_string +# error "__cpp_lib_to_string should not be defined before c++26" # endif #elif TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 7829e06f90760b..c1e1f9f340af48 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -88,7 +88,7 @@ __cpp_lib_expected 202211L [C++23] __cpp_lib_filesystem 201703L [C++17] __cpp_lib_format 202106L [C++20] - __cpp_lib_format_path 202403L [C++23] + __cpp_lib_format_path 202403L [C++26] __cpp_lib_format_ranges 202207L [C++23] __cpp_lib_format_uchar 202311L [C++20] __cpp_lib_formatters 202302L [C++23] @@ -216,7 +216,7 @@ __cpp_lib_to_array 201907L [C++20] __cpp_lib_to_chars 201611L [C++17] 202306L [C++26] - __cpp_lib_to_string 202306L [C++23] + __cpp_lib_to_string 202306L [C++26] __cpp_lib_to_underlying 202102L [C++23] __cpp_lib_transformation_trait_aliases 201304L [C++14] __cpp_lib_transparent_operators 201210L [C++14] @@ -513,7 +513,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -1005,7 +1005,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -1348,7 +1348,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -1891,7 +1891,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -2303,7 +2303,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -2972,7 +2972,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -3543,7 +3543,7 @@ # endif # ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined before c++23" +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifdef __cpp_lib_format_ranges @@ -4350,7 +4350,7 @@ # endif # ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined before c++23" +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifdef __cpp_lib_to_underlying @@ -4971,17 +4971,8 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_format_path -# error "__cpp_lib_format_path should be defined in c++23" -# endif -# if __cpp_lib_format_path != 202403L -# error "__cpp_lib_format_path should have the value 202403L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_format_path -# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++26" # endif # ifndef __cpp_lib_format_ranges @@ -5943,17 +5934,8 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_to_string -# error "__cpp_lib_to_string should be defined in c++23" -# endif -# if __cpp_lib_to_string != 202306L -# error "__cpp_lib_to_string should have the value 202306L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_to_string -# error "__cpp_lib_to_string should not be defined because it is unimplemented in libc++!" -# endif +# ifdef __cpp_lib_to_string +# error "__cpp_lib_to_string should not be defined before c++26" # endif # ifndef __cpp_lib_to_underlying diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp index 212804356a056d..6a9ec1a2ffec24 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -57,22 +58,26 @@ T basic_gcd_(T m, T n) { template T basic_gcd(T m, T n) { using Tp = std::make_unsigned_t; - if (m < 0 && m != std::numeric_limits::min()) - m = -m; - if (n < 0 && n != std::numeric_limits::min()) - n = -n; + if constexpr (std::is_signed_v) { + if (m < 0 && m != std::numeric_limits::min()) + m = -m; + if (n < 0 && n != std::numeric_limits::min()) + n = -n; + } return basic_gcd_(static_cast(m), static_cast(n)); } template void do_fuzzy_tests() { std::mt19937 gen(1938); - std::uniform_int_distribution distrib; + using DistIntType = std::conditional_t; // See N4981 [rand.req.genl]/1.5 + constexpr Input max_input = std::numeric_limits::max(); + std::uniform_int_distribution distrib(0, max_input); constexpr int nb_rounds = 10000; for (int i = 0; i < nb_rounds; ++i) { - Input n = distrib(gen); - Input m = distrib(gen); + Input n = static_cast(distrib(gen)); + Input m = static_cast(distrib(gen)); assert(std::gcd(n, m) == basic_gcd(n, m)); } } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp index 2c43e121613c77..f31a679dd6214f 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp @@ -32,7 +32,7 @@ static void set_tz(std::string zone) { // Unlike POSIX it does not mention the string of putenv becomes part // of the environment. - int status = _putenv_s("TZ", zone.c_str(), 1); + int status = _putenv_s("TZ", zone.c_str()); assert(status == 0); } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp index 4d600fcdf40e3f..8dd895fd21814f 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp @@ -40,7 +40,7 @@ static void test_exception([[maybe_unused]] std::string_view zone) { TEST_VALIDATE_EXCEPTION( std::runtime_error, [&]([[maybe_unused]] const std::runtime_error& e) { - std::string_view what{"tzdb: requested time zone not found"}; + [[maybe_unused]] std::string_view what{"tzdb: requested time zone not found"}; TEST_LIBCPP_REQUIRE( e.what() == what, TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp index e6497e26323ce6..98509c298ebcb8 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp @@ -34,7 +34,7 @@ static void set_tz(std::string zone) { // Unlike POSIX it does not mention the string of putenv becomes part // of the environment. - int status = _putenv_s("TZ", zone.c_str(), 1); + int status = _putenv_s("TZ", zone.c_str()); assert(status == 0); } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp index f929dafcc96838..08ce48dfd0edb2 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -42,7 +42,7 @@ static void test_exception([[maybe_unused]] std::string_view zone) { TEST_VALIDATE_EXCEPTION( std::runtime_error, [&]([[maybe_unused]] const std::runtime_error& e) { - std::string_view what{"tzdb: requested time zone not found"}; + [[maybe_unused]] std::string_view what{"tzdb: requested time zone not found"}; TEST_LIBCPP_REQUIRE( e.what() == what, TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp index d7374351afa8bf..accb601dd00365 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp @@ -209,6 +209,7 @@ template static constexpr bool can_make_from_tuple = std::is_same_v(T{}, Tuple{})), uint8_t>; +#ifdef _LIBCPP_VERSION template auto test_make_from_tuple_impl(T&&, Tuple&& t) -> decltype(std::__make_from_tuple_impl( @@ -224,6 +225,7 @@ uint32_t test_make_from_tuple_impl(...) { template static constexpr bool can_make_from_tuple_impl = std::is_same_v(T{}, Tuple{})), uint8_t>; +#endif // _LIBCPP_VERSION struct A { int a; @@ -263,23 +265,23 @@ static_assert(can_make_from_tuple>); // Test std::__make_from_tuple_impl constraints. // reinterpret_cast -static_assert(!can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); // const_cast -static_assert(!can_make_from_tuple_impl>); -static_assert(!can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); // static_cast -static_assert(!can_make_from_tuple_impl>); -static_assert(!can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); -static_assert(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(!can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); +LIBCPP_STATIC_ASSERT(can_make_from_tuple_impl>); } // namespace LWG3528 diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp index db05691c55818c..039a2373348c4e 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp @@ -516,7 +516,7 @@ constexpr void test_swap_sfinae() { } } -_LIBCPP_CONSTEXPR_SINCE_CXX20 void test_swap_noexcept() { +TEST_CONSTEXPR_CXX20 void test_swap_noexcept() { { using V = std::variant; static_assert(std::is_swappable_v && has_swap_member(), ""); diff --git a/libcxx/test/support/msvc_stdlib_force_include.h b/libcxx/test/support/msvc_stdlib_force_include.h index 6c26085e72c45f..785670224c3b18 100644 --- a/libcxx/test/support/msvc_stdlib_force_include.h +++ b/libcxx/test/support/msvc_stdlib_force_include.h @@ -67,7 +67,6 @@ const AssertionDialogAvoider assertion_dialog_avoider{}; // Silence compiler warnings. # pragma warning(disable : 4180) // qualifier applied to function type has no meaning; ignored # pragma warning(disable : 4324) // structure was padded due to alignment specifier -# pragma warning(disable : 4521) // multiple copy constructors specified # pragma warning(disable : 4702) // unreachable code # pragma warning(disable : 28251) // Inconsistent annotation for 'new': this instance has no annotations. #endif // !defined(__clang__) @@ -91,7 +90,7 @@ const AssertionDialogAvoider assertion_dialog_avoider{}; #include #if _HAS_CXX23 -# define TEST_STD_VER 99 +# define TEST_STD_VER 23 #elif _HAS_CXX20 # define TEST_STD_VER 20 #elif _HAS_CXX17 diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b04cb4f5115547..1e79f6c140758c 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -515,7 +515,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_format_path", - "values": {"c++23": 202403}, # P2845R8: Formatting of std::filesystem::path + "values": {"c++26": 202403}, # P2845R8: Formatting of std::filesystem::path "headers": ["filesystem"], "unimplemented": True, }, @@ -1270,7 +1270,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_to_string", - "values": {"c++23": 202306}, # P2587R3 to_string or not to_string + "values": {"c++26": 202306}, # P2587R3 to_string or not to_string "headers": ["string"], "unimplemented": True, }, diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 687f9499009d5e..3e0efe540e1bf1 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -231,36 +231,71 @@ static void writePltHeaderLong(uint8_t *buf) { // The default PLT header requires the .got.plt to be within 128 Mb of the // .plt in the positive direction. void ARM::writePltHeader(uint8_t *buf) const { - // Use a similar sequence to that in writePlt(), the difference is the calling - // conventions mean we use lr instead of ip. The PLT entry is responsible for - // saving lr on the stack, the dynamic loader is responsible for reloading - // it. - const uint32_t pltData[] = { - 0xe52de004, // L1: str lr, [sp,#-4]! - 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) - 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) - 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) - }; - - uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; - if (!llvm::isUInt<27>(offset)) { - // We cannot encode the Offset, use the long form. - writePltHeaderLong(buf); - return; + if (config->armThumbPLTs) { + // The instruction sequence for thumb: + // + // 0: b500 push {lr} + // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe + // 6: 44fe add lr, pc + // 8: f85e ff08 ldr pc, [lr, #8]! + // e: .word .got.plt - .plt - 16 + // + // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from + // `pc` in the add instruction and 8 bytes for the `lr` adjustment. + // + uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16; + assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); + write16(buf + 0, 0xb500); + // Split into two halves to support endianness correctly. + write16(buf + 2, 0xf8df); + write16(buf + 4, 0xe008); + write16(buf + 6, 0x44fe); + // Split into two halves to support endianness correctly. + write16(buf + 8, 0xf85e); + write16(buf + 10, 0xff08); + write32(buf + 12, offset); + + memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary + memcpy(buf + 20, trapInstr.data(), 4); + memcpy(buf + 24, trapInstr.data(), 4); + memcpy(buf + 28, trapInstr.data(), 4); + } else { + // Use a similar sequence to that in writePlt(), the difference is the + // calling conventions mean we use lr instead of ip. The PLT entry is + // responsible for saving lr on the stack, the dynamic loader is responsible + // for reloading it. + const uint32_t pltData[] = { + 0xe52de004, // L1: str lr, [sp,#-4]! + 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) + 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) + 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) + }; + + uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; + if (!llvm::isUInt<27>(offset)) { + // We cannot encode the Offset, use the long form. + writePltHeaderLong(buf); + return; + } + write32(buf + 0, pltData[0]); + write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff)); + write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff)); + write32(buf + 12, pltData[3] | (offset & 0xfff)); + memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary + memcpy(buf + 20, trapInstr.data(), 4); + memcpy(buf + 24, trapInstr.data(), 4); + memcpy(buf + 28, trapInstr.data(), 4); } - write32(buf + 0, pltData[0]); - write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff)); - write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff)); - write32(buf + 12, pltData[3] | (offset & 0xfff)); - memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary - memcpy(buf + 20, trapInstr.data(), 4); - memcpy(buf + 24, trapInstr.data(), 4); - memcpy(buf + 28, trapInstr.data(), 4); } void ARM::addPltHeaderSymbols(InputSection &isec) const { - addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec); + if (config->armThumbPLTs) { + addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec); + } else { + addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec); + } } // Long form PLT entries that do not have any restrictions on the displacement @@ -279,32 +314,65 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, // .plt in the positive direction. void ARM::writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const { - // The PLT entry is similar to the example given in Appendix A of ELF for - // the Arm Architecture. Instead of using the Group Relocations to find the - // optimal rotation for the 8-bit immediate used in the add instructions we - // hard code the most compact rotations for simplicity. This saves a load - // instruction over the long plt sequences. - const uint32_t pltData[] = { - 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 - 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 - 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 - }; - uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; - if (!llvm::isUInt<27>(offset)) { - // We cannot encode the Offset, use the long form. - writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); - return; + if (!config->armThumbPLTs) { + uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; + + // The PLT entry is similar to the example given in Appendix A of ELF for + // the Arm Architecture. Instead of using the Group Relocations to find the + // optimal rotation for the 8-bit immediate used in the add instructions we + // hard code the most compact rotations for simplicity. This saves a load + // instruction over the long plt sequences. + const uint32_t pltData[] = { + 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 + 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 + 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 + }; + if (!llvm::isUInt<27>(offset)) { + // We cannot encode the Offset, use the long form. + writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); + return; + } + write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); + write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff)); + write32(buf + 8, pltData[2] | (offset & 0xfff)); + memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary + } else { + uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12; + assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); + + // A PLT entry will be: + // + // movw ip, # + // movt ip, # + // add ip, pc + // L1: ldr.w pc, [ip] + // b L1 + // + // where ip = r12 = 0xc + + // movw ip, # + write16(buf + 2, 0x0c00); // use `ip` + relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset); + + // movt ip, # + write16(buf + 6, 0x0c00); // use `ip` + relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset); + + write16(buf + 8, 0x44fc); // add ip, pc + write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half) + write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half) + write16(buf + 14, 0xe7fc); // Branch to previous instruction } - write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); - write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff)); - write32(buf + 8, pltData[2] | (offset & 0xfff)); - memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary } void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { - addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec); + if (config->armThumbPLTs) { + addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec); + } else { + addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec); + } } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, @@ -325,6 +393,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). + assert(!config->armThumbPLTs && + "If the source is ARM, we should not need Thumb PLTs"); if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; [[fallthrough]]; @@ -335,9 +405,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, } case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: - // Source is Thumb, all PLT entries are ARM so interworking is required. + // Source is Thumb, when all PLT entries are ARM interworking is required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). - if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) + if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0)) return true; [[fallthrough]]; case R_ARM_THM_CALL: { @@ -547,7 +617,6 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // STT_FUNC we choose whether to write a BL or BLX depending on the // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is // not of type STT_FUNC then we must preserve the original instruction. - // PLT entries are always ARM state so we know we don't need to interwork. assert(rel.sym); // R_ARM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000; @@ -606,12 +675,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // PLT entries are always ARM state so we know we need to interwork. assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; + bool useThumb = bit0Thumb || config->armThumbPLTs; bool isBlx = (read16(loc + 2) & 0x1000) == 0; // lld 10.0 and before always used bit0Thumb when deciding to write a BLX - // even when type not STT_FUNC. PLT entries generated by LLD are always ARM. - if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb) + // even when type not STT_FUNC. + if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb) stateChangeWarning(loc, rel.type, *rel.sym); - if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) { + if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) { // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As // the BLX instruction may only be two byte aligned. This must be done // before overflow check. diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index f0dfe7f377de0e..883c4a2f84294c 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -217,6 +217,7 @@ struct Config { bool allowMultipleDefinition; bool fatLTOObjects; bool androidPackDynRelocs = false; + bool armThumbPLTs = false; bool armHasBlx = false; bool armHasMovtMovw = false; bool armJ1J2BranchEncoding = false; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 1f496026d3ae20..d760dddcf5ec5c 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -194,6 +194,18 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && profile == ARMBuildAttrs::MicroControllerProfile) config->armCMSESupport = true; + + // The thumb PLT entries require Thumb2 which can be used on multiple archs. + // For now, let's limit it to ones where ARM isn't available and we know have + // Thumb2. + std::optional armISA = + attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use); + std::optional thumb = + attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); + bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed; + bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32; + if (noArmISA && hasThumb2) + config->armThumbPLTs = true; } InputFile::InputFile(Kind k, MemoryBufferRef m) diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 9d1612beae872e..635ded554497ba 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -379,12 +379,21 @@ class ObjcCategoryMerger { InfoWriteSection catPtrListInfo; }; - // Information about a pointer list in the original categories (method lists, - // protocol lists, etc) + // Information about a pointer list in the original categories or class(method + // lists, protocol lists, etc) struct PointerListInfo { + PointerListInfo() = default; + PointerListInfo(const PointerListInfo &) = default; PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) : categoryPrefix(_categoryPrefix), pointersPerStruct(_pointersPerStruct) {} + + inline bool operator==(const PointerListInfo &cmp) { + return pointersPerStruct == cmp.pointersPerStruct && + structSize == cmp.structSize && structCount == cmp.structCount && + allPtrs == cmp.allPtrs; + } + const char *categoryPrefix; uint32_t pointersPerStruct = 0; @@ -395,9 +404,9 @@ class ObjcCategoryMerger { std::vector allPtrs; }; - // Full information about all the categories that extend a class. This will - // include all the additional methods, protocols, and properties that are - // contained in all the categories that extend a particular class. + // Full information describing an ObjC class . This will include all the + // additional methods, protocols, and properties that are contained in the + // class and all the categories that extend a particular class. struct ClassExtensionInfo { ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; @@ -449,6 +458,9 @@ class ObjcCategoryMerger { void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, PointerListInfo &ptrList); + PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, + uint32_t secOffset); + void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, PointerListInfo &ptrList); @@ -456,9 +468,9 @@ class ObjcCategoryMerger { const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList); - void emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, - const ClassExtensionInfo &extInfo, - const PointerListInfo &ptrList); + Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, + const ClassExtensionInfo &extInfo, + const PointerListInfo &ptrList); Defined *emitCategory(const ClassExtensionInfo &extInfo); Defined *emitCatListEntrySec(const std::string &forCategoryName, @@ -474,6 +486,10 @@ class ObjcCategoryMerger { uint32_t offset); Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, uint32_t offset); + Defined *getClassRo(const Defined *classSym, bool getMetaRo); + void mergeCategoriesIntoBaseClass(const Defined *baseClass, + std::vector &categories); + void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, uint32_t offset); @@ -552,6 +568,29 @@ ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, return dyn_cast_or_null(sym); } +// Get the class's ro_data symbol. If getMetaRo is true, then we will return +// the meta-class's ro_data symbol. Otherwise, we will return the class +// (instance) ro_data symbol. +Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, + bool getMetaRo) { + ConcatInputSection *isec = dyn_cast(classSym->isec()); + if (!isec) + return nullptr; + + if (!getMetaRo) + return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + + classSym->value); + + Defined *metaClass = tryGetDefinedAtIsecOffset( + isec, classLayout.metaClassOffset + classSym->value); + if (!metaClass) + return nullptr; + + return tryGetDefinedAtIsecOffset( + dyn_cast(metaClass->isec()), + classLayout.roDataOffset); +} + // Given an ConcatInputSection or CStringInputSection and an offset, if there is // a symbol(Defined) at that offset, then erase the symbol (mark it not live) void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( @@ -663,6 +702,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, "Protocol list end offset does not match expected size"); } +// Parse a protocol list and return the PointerListInfo for it +ObjcCategoryMerger::PointerListInfo +ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, + uint32_t secOffset) { + PointerListInfo ptrList; + parseProtocolListInfo(isec, secOffset, ptrList); + return ptrList; +} + // Parse a pointer list that might be linked to ConcatInputSection at a given // offset. This can be used for instance methods, class methods, instance props // and class props since they have the same format. @@ -769,11 +817,11 @@ void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, // Generate a protocol list (including header) and link it into the parent at // the specified offset. -void ObjcCategoryMerger::emitAndLinkProtocolList( +Defined *ObjcCategoryMerger::emitAndLinkProtocolList( Defined *parentSym, uint32_t linkAtOffset, const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { if (ptrList.allPtrs.empty()) - return; + return nullptr; assert(ptrList.allPtrs.size() == ptrList.structCount); @@ -820,6 +868,8 @@ void ObjcCategoryMerger::emitAndLinkProtocolList( infoCategoryWriter.catPtrListInfo.relocTemplate); offset += target->wordSize; } + + return ptrListSym; } // Generate a pointer list (including header) and link it into the parent at the @@ -1265,10 +1315,15 @@ void ObjcCategoryMerger::removeRefsToErasedIsecs() { void ObjcCategoryMerger::doMerge() { collectAndValidateCategoriesData(); - for (auto &entry : categoryMap) - if (entry.second.size() > 1) + for (auto &[baseClass, catInfos] : categoryMap) { + if (auto *baseClassDef = dyn_cast(baseClass)) { + // Merge all categories into the base class + mergeCategoriesIntoBaseClass(baseClassDef, catInfos); + } else if (catInfos.size() > 1) { // Merge all categories into a new, single category - mergeCategoriesIntoSingleCategory(entry.second); + mergeCategoriesIntoSingleCategory(catInfos); + } + } // Erase all categories that were merged eraseMergedCategories(); @@ -1302,3 +1357,101 @@ void objc::mergeCategories() { } void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } + +void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( + const Defined *baseClass, std::vector &categories) { + assert(categories.size() >= 1 && "Expected at least one category to merge"); + + // Collect all the info from the categories + ClassExtensionInfo extInfo(catLayout); + for (auto &catInfo : categories) { + parseCatInfoToExtInfo(catInfo, extInfo); + } + + // Get metadata for the base class + Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); + ConcatInputSection *metaIsec = dyn_cast(metaRo->isec()); + Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); + ConcatInputSection *classIsec = dyn_cast(classRo->isec()); + + // Now collect the info from the base class from the various lists in the + // class metadata + + // Protocol lists are a special case - the same protocol list is in classRo + // and metaRo, so we only need to parse it once + parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, + extInfo.protocols); + + // Check that the classRo and metaRo protocol lists are identical + assert( + parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset) == + parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset) && + "Category merger expects classRo and metaRo to have the same protocol " + "list"); + + parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, + extInfo.classMethods); + parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, + extInfo.instanceMethods); + + parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, + extInfo.classProps); + parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, + extInfo.instanceProps); + + // Erase the old lists - these will be generated and replaced + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); + eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); + eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); + + // Emit the newly merged lists - first into the meta RO then into the class RO + // First we emit and link the protocol list into the meta RO. Then we link it + // in the classRo as well (they're supposed to be identical) + if (Defined *protoListSym = + emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, + extInfo, extInfo.protocols)) { + createSymbolReference(classRo, protoListSym, + roClassLayout.baseProtocolsOffset, + infoCategoryWriter.catBodyInfo.relocTemplate); + } + + emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, + extInfo.classMethods); + emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, + extInfo.instanceMethods); + + emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, + extInfo.classProps); + + emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, + extInfo.instanceProps); + + // Mark all the categories as merged - this will be used to erase them later + for (auto &catInfo : categories) + catInfo.wasMerged = true; +} + +// Erase the symbol at a given offset in an InputSection +void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, + uint32_t offset) { + Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); + if (!sym) + return; + + // Remove the symbol from isec->symbols + assert(isa(sym) && "Can only erase a Defined"); + llvm::erase(isec->symbols, sym); + + // Remove the relocs that refer to this symbol + auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; + llvm::erase_if(isec->relocs, removeAtOff); + + // Now, if the symbol fully occupies a ConcatInputSection, we can also erase + // the whole ConcatInputSection + if (ConcatInputSection *cisec = dyn_cast(sym->isec())) + if (cisec->data.size() == sym->size) + eraseISec(cisec); +} diff --git a/lld/test/ELF/armv8-thumb-plt-reloc.s b/lld/test/ELF/armv8-thumb-plt-reloc.s new file mode 100644 index 00000000000000..47cd5c1b741ee0 --- /dev/null +++ b/lld/test/ELF/armv8-thumb-plt-reloc.s @@ -0,0 +1,126 @@ +// REQUIRES: arm +// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumb --mcpu=cortex-m33 %p/Inputs/arm-plt-reloc.s -o %t1 +// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumb --mcpu=cortex-m33 %s -o %t2 +// RUN: ld.lld %t1 %t2 -o %t +// RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s +// RUN: ld.lld -shared %t1 %t2 -o %t.so +// RUN: llvm-objdump --no-print-imm-hex -d %t.so | FileCheck --check-prefix=DSO %s +// RUN: llvm-readelf -S -r %t.so | FileCheck -check-prefix=DSOREL %s + +// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumbeb --mcpu=cortex-m33 %p/Inputs/arm-plt-reloc.s -o %t1.be +// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumbeb --mcpu=cortex-m33 %s -o %t2.be +// RUN: ld.lld %t1.be %t2.be -o %t.be +// RUN: llvm-objdump --no-print-imm-hex -d %t.be | FileCheck %s +// RUN: ld.lld -shared %t1.be %t2.be -o %t.so.be +// RUN: llvm-objdump --no-print-imm-hex -d %t.so.be | FileCheck --check-prefix=DSO %s +// RUN: llvm-readelf -S -r %t.so.be | FileCheck -check-prefix=DSOREL %s + +// RUN: ld.lld --be8 %t1.be %t2.be -o %t.be +// RUN: llvm-objdump --no-print-imm-hex -d %t.be | FileCheck %s +// RUN: ld.lld --be8 -shared %t1.be %t2.be -o %t.so.be +// RUN: llvm-objdump --no-print-imm-hex -d %t.so.be | FileCheck --check-prefix=DSO %s +// RUN: llvm-readelf -S -r %t.so.be | FileCheck -check-prefix=DSOREL %s + +/// Test PLT entry generation + .text + .align 2 + .globl _start + .type _start,%function +_start: + bl func1 + bl func2 + bl func3 + b.w func1 + b.w func2 + b.w func3 + beq.w func1 + beq.w func2 + beq.w func3 + +/// Executable, expect no PLT +// CHECK: Disassembly of section .text: +// CHECK-EMPTY: +// CHECK-NEXT: : +// CHECK-NEXT: bx lr +// CHECK: : +// CHECK-NEXT: bx lr +// CHECK: : +// CHECK-NEXT: bx lr +// CHECK-NEXT: d4d4 +// CHECK: <_start>: +// CHECK-NEXT: bl {{.*}} +// CHECK-NEXT: bl {{.*}} +// CHECK-NEXT: bl {{.*}} +// CHECK-NEXT: b.w {{.*}} +// CHECK-NEXT: b.w {{.*}} +// CHECK-NEXT: b.w {{.*}} +// CHECK-NEXT: beq.w {{.*}} +// CHECK-NEXT: beq.w {{.*}} +// CHECK-NEXT: beq.w {{.*}} + +// DSO: Disassembly of section .text: +// DSO-EMPTY: +// DSO-NEXT: : +// DSO-NEXT: bx lr +// DSO: : +// DSO-NEXT: bx lr +// DSO: : +// DSO-NEXT: bx lr +// DSO-NEXT: d4d4 +// DSO: <_start>: +/// 0x10260 = PLT func1 +// DSO-NEXT: bl 0x10260 +/// 0x10270 = PLT func2 +// DSO-NEXT: bl 0x10270 +/// 0x10280 = PLT func3 +// DSO-NEXT: bl 0x10280 +/// 0x10260 = PLT func1 +// DSO-NEXT: b.w 0x10260 +/// 0x10270 = PLT func2 +// DSO-NEXT: b.w 0x10270 +/// 0x10280 = PLT func3 +// DSO-NEXT: b.w 0x10280 +/// 0x10260 = PLT func1 +// DSO-NEXT: beq.w 0x10260 +/// 0x10270 = PLT func2 +// DSO-NEXT: beq.w 0x10270 +/// 0x10280 = PLT func3 +// DSO-NEXT: beq.w 0x10280 +// DSO: Disassembly of section .plt: +// DSO-EMPTY: +// DSO-NEXT: 10240 <.plt>: +// DSO-NEXT: push {lr} +// DSO-NEXT: ldr.w lr, [pc, #8] +// DSO-NEXT: add lr, pc +// DSO-NEXT: ldr pc, [lr, #8]! +/// 0x20098 = .got.plt (0x302D8) - pc (0x10238 = .plt + 8) - 8 +// DSO-NEXT: .word 0x00020098 +// DSO-NEXT: .word 0xd4d4d4d4 +// DSO-NEXT: .word 0xd4d4d4d4 +// DSO-NEXT: .word 0xd4d4d4d4 +// DSO-NEXT: .word 0xd4d4d4d4 + +/// 136 + 2 << 16 + 0x1026c = 0x302f4 = got entry 1 +// DSO-NEXT: 10260: f240 0c88 movw r12, #136 +// DSO-NEXT: f2c0 0c02 movt r12, #2 +// DSO-NEXT: 44fc add r12, pc +// DSO-NEXT: f8dc f000 ldr.w pc, [r12] +// DSO-NEXT: e7fc b 0x1026a +/// 124 + 2 << 16 + 0x1027c = 0x302f8 = got entry 2 +// DSO-NEXT: 10270: f240 0c7c movw r12, #124 +// DSO-NEXT: f2c0 0c02 movt r12, #2 +// DSO-NEXT: 44fc add r12, pc +// DSO-NEXT: f8dc f000 ldr.w pc, [r12] +// DSO-NEXT: e7fc b 0x1027a +/// 112 + 2 << 16 + 0x1028c = 0x302fc = got entry 3 +// DSO-NEXT: 10280: f240 0c70 movw r12, #112 +// DSO-NEXT: f2c0 0c02 movt r12, #2 +// DSO-NEXT: 44fc add r12, pc +// DSO-NEXT: f8dc f000 ldr.w pc, [r12] +// DSO-NEXT: e7fc b 0x1028a + +// DSOREL: .got.plt PROGBITS 000302e8 {{.*}} 000018 00 WA 0 0 4 +// DSOREL: Relocation section '.rel.plt' +// DSOREL: 000302f4 {{.*}} R_ARM_JUMP_SLOT {{.*}} func1 +// DSOREL: 000302f8 {{.*}} R_ARM_JUMP_SLOT {{.*}} func2 +// DSOREL: 000302fc {{.*}} R_ARM_JUMP_SLOT {{.*}} func3 diff --git a/lld/test/MachO/objc-category-merging-complete-test.s b/lld/test/MachO/objc-category-merging-complete-test.s index 74400177b550dc..cf3e19e2f9c8b4 100644 --- a/lld/test/MachO/objc-category-merging-complete-test.s +++ b/lld/test/MachO/objc-category-merging-complete-test.s @@ -1,6 +1,7 @@ # REQUIRES: aarch64 # RUN: rm -rf %t; split-file %s %t && cd %t +############ Test merging multiple categories into a single category ############ ## Create a dylib to link against(a64_file1.dylib) and merge categories in the main binary (file2_merge_a64.exe) # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_file1.o a64_file1.s # RUN: %lld -arch arm64 a64_file1.o -o a64_file1.dylib -dylib @@ -12,6 +13,10 @@ # RUN: llvm-objdump --objc-meta-data --macho a64_file2_no_merge.exe | FileCheck %s --check-prefixes=NO_MERGE_CATS # RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge.exe | FileCheck %s --check-prefixes=MERGE_CATS +############ Test merging multiple categories into the base class ############ +# RUN: %lld -arch arm64 -o a64_file2_merge_into_class.exe -objc_category_merging a64_file1.o a64_file2.o +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge_into_class.exe | FileCheck %s --check-prefixes=MERGE_CATS_CLS + MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category02|Category03) MERGE_CATS-NEXT: name {{.*}} Category02|Category03 @@ -101,6 +106,211 @@ NO_MERGE_CATS-NEXT: 24 NO_MERGE_CATS-NEXT: 2 +MERGE_CATS_CLS: _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass 0x0 +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x2 RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 0 +MERGE_CATS_CLS-NEXT: instanceSize 4 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 8 +MERGE_CATS_CLS-NEXT: name {{.*}} class02InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) class02InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) myProtocol02Method] +MERGE_CATS_CLS-NEXT: name {{.*}} class03InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) class03InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) myProtocol03Method] +MERGE_CATS_CLS-NEXT: name {{.*}} baseInstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass baseInstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass myProtocol01Method] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass MyProtocol01Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass setMyProtocol01Prop:] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars {{.*}} +MERGE_CATS_CLS-NEXT: entsize 32 +MERGE_CATS_CLS-NEXT: count 1 +MERGE_CATS_CLS-NEXT: offset {{.*}} 0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: type {{.*}} i +MERGE_CATS_CLS-NEXT: alignment 2 +MERGE_CATS_CLS-NEXT: size 4 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties {{.*}} +MERGE_CATS_CLS-NEXT: entsize 16 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,N,VMyProtocol01Prop +MERGE_CATS_CLS-NEXT: Meta Class +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass {{.*}} _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x3 RO_META RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 40 +MERGE_CATS_CLS-NEXT: instanceSize 40 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 5 +MERGE_CATS_CLS-NEXT: name {{.*}} class02ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) class02ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) MyProtocol02Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} class03ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) class03ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) MyProtocol03Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} baseClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass baseClassMethod] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars 0x0 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties 0x0 +MERGE_CATS_CLS: __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + + #--- a64_file1.s ## @protocol MyProtocol01 diff --git a/lld/test/MachO/objc-category-merging-extern-class-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s similarity index 59% rename from lld/test/MachO/objc-category-merging-extern-class-minimal.s rename to lld/test/MachO/objc-category-merging-minimal.s index 5dd8924df5ad68..fcd90f178b150e 100644 --- a/lld/test/MachO/objc-category-merging-extern-class-minimal.s +++ b/lld/test/MachO/objc-category-merging-minimal.s @@ -1,7 +1,8 @@ # REQUIRES: aarch64 # RUN: rm -rf %t; split-file %s %t && cd %t -## Create a dylib with a fake base class to link against +############ Test merging multiple categories into a single category ############ +## Create a dylib with a fake base class to link against in when merging between categories # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_fakedylib.o a64_fakedylib.s # RUN: %lld -arch arm64 a64_fakedylib.o -o a64_fakedylib.dylib -dylib @@ -14,6 +15,15 @@ # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_merge.dylib | FileCheck %s --check-prefixes=MERGE_CATS +############ Test merging multiple categories into the base class ############ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o + +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE + + #### Check merge categories enabled ### # Check that the original categories are not there MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 @@ -44,6 +54,28 @@ NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 +#### Check merge cateogires into base class is disabled #### +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +#### Check merge cateogires into base class is enabled and categories are merged into base class #### +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass +YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass +YES_MERGE_INTO_BASE: baseMethods +YES_MERGE_INTO_BASE-NEXT: entsize 24 +YES_MERGE_INTO_BASE-NEXT: count 3 +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod] #--- a64_fakedylib.s @@ -156,3 +188,94 @@ L_OBJC_IMAGE_INFO: .addrsig .addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01 + +#--- merge_base_class_minimal.s +; clang -c merge_base_class_minimal.mm -O3 -target arm64-apple-macos -arch arm64 -S -o merge_base_class_minimal.s +; ================== Generated from ObjC: ================== +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @implementation MyBaseClass +; - (void)baseInstanceMethod {} +; @end +; ================== Generated from ObjC ================== + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 11, 0 + .p2align 2 +"-[MyBaseClass baseInstanceMethod]": + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: + .asciz "MyBaseClass" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 3 + .long 40 + .long 40 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: + .asciz "baseInstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 + .long 1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .p2align 3, 0x0 +__OBJC_CLASS_RO_$_MyBaseClass: + .long 2 + .long 0 + .long 0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 +.subsections_via_symbols diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp index cdd2c42f939efe..c6a1592012e64c 100644 --- a/lld/wasm/WriterUtils.cpp +++ b/lld/wasm/WriterUtils.cpp @@ -35,6 +35,8 @@ std::string toString(ValType type) { return "funcref"; case ValType::EXTERNREF: return "externref"; + case ValType::EXNREF: + return "exnref"; case ValType::OTHERREF: return "otherref"; } diff --git a/lldb/bindings/headers.swig b/lldb/bindings/headers.swig index ffdc3c31ec883a..c91504604b6ac6 100644 --- a/lldb/bindings/headers.swig +++ b/lldb/bindings/headers.swig @@ -8,6 +8,8 @@ %{ #include "lldb/lldb-public.h" #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBAttachInfo.h" #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" diff --git a/lldb/bindings/interface/SBAddressRangeDocstrings.i b/lldb/bindings/interface/SBAddressRangeDocstrings.i new file mode 100644 index 00000000000000..650195704d73e6 --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeDocstrings.i @@ -0,0 +1,3 @@ +%feature("docstring", +"API clients can get address range information." +) lldb::SBAddressRange; diff --git a/lldb/bindings/interface/SBAddressRangeExtensions.i b/lldb/bindings/interface/SBAddressRangeExtensions.i new file mode 100644 index 00000000000000..31bcfcb64590bc --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeExtensions.i @@ -0,0 +1,11 @@ +%extend lldb::SBAddressRange { +#ifdef SWIGPYTHON + %pythoncode%{ + def __repr__(self): + import lldb + stream = lldb.SBStream() + self.GetDescription(stream, lldb.target if lldb.target else lldb.SBTarget()) + return stream.GetData() + %} +#endif +} diff --git a/lldb/bindings/interface/SBAddressRangeListDocstrings.i b/lldb/bindings/interface/SBAddressRangeListDocstrings.i new file mode 100644 index 00000000000000..e4b96b9ca59312 --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeListDocstrings.i @@ -0,0 +1,3 @@ +%feature("docstring", +"Represents a list of :py:class:`SBAddressRange`." +) lldb::SBAddressRangeList; diff --git a/lldb/bindings/interface/SBAddressRangeListExtensions.i b/lldb/bindings/interface/SBAddressRangeListExtensions.i new file mode 100644 index 00000000000000..e281a84d73d27d --- /dev/null +++ b/lldb/bindings/interface/SBAddressRangeListExtensions.i @@ -0,0 +1,29 @@ +%extend lldb::SBAddressRangeList { +#ifdef SWIGPYTHON + %pythoncode%{ + def __len__(self): + '''Return the number of address ranges in a lldb.SBAddressRangeList object.''' + return self.GetSize() + + def __iter__(self): + '''Iterate over all the address ranges in a lldb.SBAddressRangeList object.''' + return lldb_iter(self, 'GetSize', 'GetAddressRangeAtIndex') + + def __getitem__(self, idx): + '''Get the address range at a given index in an lldb.SBAddressRangeList object.''' + if not isinstance(idx, int): + raise TypeError("unsupported index type: %s" % type(idx)) + count = len(self) + if not (-count <= idx < count): + raise IndexError("list index out of range") + idx %= count + return self.GetAddressRangeAtIndex(idx) + + def __repr__(self): + import lldb + stream = lldb.SBStream() + self.GetDescription(stream, lldb.target if lldb.target else lldb.SBTarget()) + return stream.GetData() + %} +#endif +} diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig index 2a29a8dd7ef0b4..0953f4c72a9101 100644 --- a/lldb/bindings/interfaces.swig +++ b/lldb/bindings/interfaces.swig @@ -12,6 +12,8 @@ /* Docstrings for SB classes and methods */ %include "./interface/SBAddressDocstrings.i" +%include "./interface/SBAddressRangeDocstrings.i" +%include "./interface/SBAddressRangeListDocstrings.i" %include "./interface/SBAttachInfoDocstrings.i" %include "./interface/SBBlockDocstrings.i" %include "./interface/SBBreakpointDocstrings.i" @@ -86,6 +88,8 @@ /* API headers */ %include "lldb/API/SBAddress.h" +%include "lldb/API/SBAddressRange.h" +%include "lldb/API/SBAddressRangeList.h" %include "lldb/API/SBAttachInfo.h" %include "lldb/API/SBBlock.h" %include "lldb/API/SBBreakpoint.h" @@ -163,6 +167,8 @@ /* Extensions for SB classes */ %include "./interface/SBAddressExtensions.i" +%include "./interface/SBAddressRangeExtensions.i" +%include "./interface/SBAddressRangeListExtensions.i" %include "./interface/SBBlockExtensions.i" %include "./interface/SBBreakpointExtensions.i" %include "./interface/SBBreakpointListExtensions.i" diff --git a/lldb/include/lldb/API/LLDB.h b/lldb/include/lldb/API/LLDB.h index b256544326a224..d8cc9f5067fe94 100644 --- a/lldb/include/lldb/API/LLDB.h +++ b/lldb/include/lldb/API/LLDB.h @@ -10,6 +10,8 @@ #define LLDB_API_LLDB_H #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBAttachInfo.h" #include "lldb/API/SBBlock.h" #include "lldb/API/SBBreakpoint.h" diff --git a/lldb/include/lldb/API/SBAddress.h b/lldb/include/lldb/API/SBAddress.h index 5e5f355ccc390c..430dad4862dbff 100644 --- a/lldb/include/lldb/API/SBAddress.h +++ b/lldb/include/lldb/API/SBAddress.h @@ -86,6 +86,7 @@ class LLDB_API SBAddress { lldb::SBLineEntry GetLineEntry(); protected: + friend class SBAddressRange; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointLocation; diff --git a/lldb/include/lldb/API/SBAddressRange.h b/lldb/include/lldb/API/SBAddressRange.h new file mode 100644 index 00000000000000..152bd82426af1c --- /dev/null +++ b/lldb/include/lldb/API/SBAddressRange.h @@ -0,0 +1,66 @@ +//===-- SBAddressRange.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBADDRESSRANGE_H +#define LLDB_API_SBADDRESSRANGE_H + +#include "lldb/API/SBDefines.h" + +namespace lldb { + +class LLDB_API SBAddressRange { +public: + SBAddressRange(); + + SBAddressRange(const lldb::SBAddressRange &rhs); + + SBAddressRange(lldb::SBAddress addr, lldb::addr_t byte_size); + + ~SBAddressRange(); + + const lldb::SBAddressRange &operator=(const lldb::SBAddressRange &rhs); + + void Clear(); + + /// Check the address range refers to a valid base address and has a byte + /// size greater than zero. + /// + /// \return + /// True if the address range is valid, false otherwise. + bool IsValid() const; + + /// Get the base address of the range. + /// + /// \return + /// Base address object. + lldb::SBAddress GetBaseAddress() const; + + /// Get the byte size of this range. + /// + /// \return + /// The size in bytes of this address range. + lldb::addr_t GetByteSize() const; + + bool operator==(const SBAddressRange &rhs); + + bool operator!=(const SBAddressRange &rhs); + + bool GetDescription(lldb::SBStream &description, const SBTarget target); + +private: + friend class SBAddressRangeList; + friend class SBBlock; + friend class SBFunction; + friend class SBProcess; + + AddressRangeUP m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBADDRESSRANGE_H diff --git a/lldb/include/lldb/API/SBAddressRangeList.h b/lldb/include/lldb/API/SBAddressRangeList.h new file mode 100644 index 00000000000000..a123287ef1b4fa --- /dev/null +++ b/lldb/include/lldb/API/SBAddressRangeList.h @@ -0,0 +1,54 @@ +//===-- SBAddressRangeList.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_API_SBADDRESSRANGELIST_H +#define LLDB_API_SBADDRESSRANGELIST_H + +#include + +#include "lldb/API/SBDefines.h" + +namespace lldb_private { +class AddressRangeListImpl; +} + +namespace lldb { + +class LLDB_API SBAddressRangeList { +public: + SBAddressRangeList(); + + SBAddressRangeList(const lldb::SBAddressRangeList &rhs); + + ~SBAddressRangeList(); + + const lldb::SBAddressRangeList & + operator=(const lldb::SBAddressRangeList &rhs); + + uint32_t GetSize() const; + + void Clear(); + + SBAddressRange GetAddressRangeAtIndex(uint64_t idx); + + void Append(const lldb::SBAddressRange &addr_range); + + void Append(const lldb::SBAddressRangeList &addr_range_list); + + bool GetDescription(lldb::SBStream &description, const SBTarget &target); + +private: + friend class SBBlock; + friend class SBProcess; + + std::unique_ptr m_opaque_up; +}; + +} // namespace lldb + +#endif // LLDB_API_SBADDRESSRANGELIST_H diff --git a/lldb/include/lldb/API/SBBlock.h b/lldb/include/lldb/API/SBBlock.h index 2570099f7652f3..de4bb22be26925 100644 --- a/lldb/include/lldb/API/SBBlock.h +++ b/lldb/include/lldb/API/SBBlock.h @@ -9,6 +9,8 @@ #ifndef LLDB_API_SBBLOCK_H #define LLDB_API_SBBLOCK_H +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBFrame.h" #include "lldb/API/SBTarget.h" @@ -52,6 +54,8 @@ class LLDB_API SBBlock { lldb::SBAddress GetRangeEndAddress(uint32_t idx); + lldb::SBAddressRangeList GetRanges(); + uint32_t GetRangeIndexForBlockAddress(lldb::SBAddress block_addr); lldb::SBValueList GetVariables(lldb::SBFrame &frame, bool arguments, diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h index 1181920677b46f..87c0a1c3661ca3 100644 --- a/lldb/include/lldb/API/SBDefines.h +++ b/lldb/include/lldb/API/SBDefines.h @@ -43,6 +43,8 @@ namespace lldb { class LLDB_API SBAddress; +class LLDB_API SBAddressRange; +class LLDB_API SBAddressRangeList; class LLDB_API SBAttachInfo; class LLDB_API SBBlock; class LLDB_API SBBreakpoint; diff --git a/lldb/include/lldb/API/SBFunction.h b/lldb/include/lldb/API/SBFunction.h index 71b372a818e4b5..df607fdc7ebf59 100644 --- a/lldb/include/lldb/API/SBFunction.h +++ b/lldb/include/lldb/API/SBFunction.h @@ -10,6 +10,7 @@ #define LLDB_API_SBFUNCTION_H #include "lldb/API/SBAddress.h" +#include "lldb/API/SBAddressRangeList.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBInstructionList.h" @@ -44,6 +45,8 @@ class LLDB_API SBFunction { lldb::SBAddress GetEndAddress(); + lldb::SBAddressRangeList GetRanges(); + const char *GetArgumentName(uint32_t arg_idx); uint32_t GetPrologueByteSize(); diff --git a/lldb/include/lldb/API/SBStream.h b/lldb/include/lldb/API/SBStream.h index 0e33f05b69916f..71caf41fd75491 100644 --- a/lldb/include/lldb/API/SBStream.h +++ b/lldb/include/lldb/API/SBStream.h @@ -62,6 +62,8 @@ class LLDB_API SBStream { protected: friend class SBAddress; + friend class SBAddressRange; + friend class SBAddressRangeList; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointLocation; diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index feeaa1cb71132b..35c2ed9c20a238 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -943,6 +943,7 @@ class LLDB_API SBTarget { protected: friend class SBAddress; + friend class SBAddressRange; friend class SBBlock; friend class SBBreakpoint; friend class SBBreakpointList; diff --git a/lldb/include/lldb/Core/AddressRange.h b/lldb/include/lldb/Core/AddressRange.h index 4a33c2d7958765..68a3ad0edd2d79 100644 --- a/lldb/include/lldb/Core/AddressRange.h +++ b/lldb/include/lldb/Core/AddressRange.h @@ -86,6 +86,8 @@ class AddressRange { /// (LLDB_INVALID_ADDRESS) and a zero byte size. void Clear(); + bool IsValid() const; + /// Check if a section offset address is contained in this range. /// /// \param[in] so_addr @@ -236,12 +238,24 @@ class AddressRange { /// The new size in bytes of this address range. void SetByteSize(lldb::addr_t byte_size) { m_byte_size = byte_size; } + bool GetDescription(Stream *s, Target *target) const; + + bool operator==(const AddressRange &rhs); + + bool operator!=(const AddressRange &rhs); + protected: // Member variables Address m_base_addr; ///< The section offset base address of this range. lldb::addr_t m_byte_size = 0; ///< The size in bytes of this address range. }; +// Forward-declarable wrapper. +class AddressRanges : public std::vector { +public: + using std::vector::vector; +}; + } // namespace lldb_private #endif // LLDB_CORE_ADDRESSRANGE_H diff --git a/lldb/include/lldb/Core/AddressRangeListImpl.h b/lldb/include/lldb/Core/AddressRangeListImpl.h new file mode 100644 index 00000000000000..46ebfe73d4d92d --- /dev/null +++ b/lldb/include/lldb/Core/AddressRangeListImpl.h @@ -0,0 +1,51 @@ +//===-- AddressRangeListImpl.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_CORE_ADDRESSRANGELISTIMPL_H +#define LLDB_CORE_ADDRESSRANGELISTIMPL_H + +#include "lldb/Core/AddressRange.h" +#include + +namespace lldb { +class SBBlock; +} + +namespace lldb_private { + +class AddressRangeListImpl { +public: + AddressRangeListImpl(); + + AddressRangeListImpl(const AddressRangeListImpl &rhs) = default; + + AddressRangeListImpl &operator=(const AddressRangeListImpl &rhs); + + size_t GetSize() const; + + void Reserve(size_t capacity); + + void Append(const AddressRange &sb_region); + + void Append(const AddressRangeListImpl &list); + + void Clear(); + + lldb_private::AddressRange GetAddressRangeAtIndex(size_t index); + +private: + friend class lldb::SBBlock; + + AddressRanges &ref(); + + AddressRanges m_ranges; +}; + +} // namespace lldb_private + +#endif // LLDB_CORE_ADDRESSRANGE_H diff --git a/lldb/include/lldb/Symbol/Block.h b/lldb/include/lldb/Symbol/Block.h index 02fd2add531033..c9c4d5ad767d7e 100644 --- a/lldb/include/lldb/Symbol/Block.h +++ b/lldb/include/lldb/Symbol/Block.h @@ -355,6 +355,8 @@ class Block : public UserID, public SymbolContextScope { // be able to get at any of the address ranges in a block. bool GetRangeAtIndex(uint32_t range_idx, AddressRange &range); + AddressRanges GetRanges(); + bool GetStartAddress(Address &addr); void SetDidParseVariables(bool b, bool set_children); diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h index 10ba921b9dac8c..6d880b4da03c99 100644 --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -19,6 +19,8 @@ class ASTResultSynthesizer; class ASTStructExtractor; class Address; class AddressRange; +class AddressRanges; +class AddressRangeList; class AddressResolver; class ArchSpec; class Architecture; @@ -308,6 +310,7 @@ template class StreamBuffer; namespace lldb { typedef std::shared_ptr ABISP; +typedef std::unique_ptr AddressRangeUP; typedef std::shared_ptr BatonSP; typedef std::shared_ptr BlockSP; typedef std::shared_ptr BreakpointSP; diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt index e8228afe103f9c..63971016093151 100644 --- a/lldb/source/API/CMakeLists.txt +++ b/lldb/source/API/CMakeLists.txt @@ -42,6 +42,8 @@ set_target_properties(lldb-sbapi-dwarf-enums PROPERTIES FOLDER "LLDB/Tablegennin add_lldb_library(liblldb SHARED ${option_framework} SBAddress.cpp + SBAddressRange.cpp + SBAddressRangeList.cpp SBAttachInfo.cpp SBBlock.cpp SBBreakpoint.cpp diff --git a/lldb/source/API/SBAddressRange.cpp b/lldb/source/API/SBAddressRange.cpp new file mode 100644 index 00000000000000..9b1affdade439c --- /dev/null +++ b/lldb/source/API/SBAddressRange.cpp @@ -0,0 +1,103 @@ +//===-- SBAddressRange.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBAddressRange.h" +#include "Utils.h" +#include "lldb/API/SBAddress.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBTarget.h" +#include "lldb/Core/AddressRange.h" +#include "lldb/Core/Section.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/Stream.h" +#include +#include + +using namespace lldb; +using namespace lldb_private; + +SBAddressRange::SBAddressRange() + : m_opaque_up(std::make_unique()) { + LLDB_INSTRUMENT_VA(this); +} + +SBAddressRange::SBAddressRange(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + m_opaque_up = clone(rhs.m_opaque_up); +} + +SBAddressRange::SBAddressRange(lldb::SBAddress addr, lldb::addr_t byte_size) + : m_opaque_up(std::make_unique(addr.ref(), byte_size)) { + LLDB_INSTRUMENT_VA(this, addr, byte_size); +} + +SBAddressRange::~SBAddressRange() = default; + +const SBAddressRange &SBAddressRange::operator=(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + m_opaque_up = clone(rhs.m_opaque_up); + return *this; +} + +bool SBAddressRange::operator==(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (!IsValid() || !rhs.IsValid()) + return false; + return m_opaque_up->operator==(*(rhs.m_opaque_up)); +} + +bool SBAddressRange::operator!=(const SBAddressRange &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + return !(*this == rhs); +} + +void SBAddressRange::Clear() { + LLDB_INSTRUMENT_VA(this); + + m_opaque_up.reset(); +} + +bool SBAddressRange::IsValid() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up && m_opaque_up->IsValid(); +} + +lldb::SBAddress SBAddressRange::GetBaseAddress() const { + LLDB_INSTRUMENT_VA(this); + + if (!IsValid()) + return lldb::SBAddress(); + return lldb::SBAddress(m_opaque_up->GetBaseAddress()); +} + +lldb::addr_t SBAddressRange::GetByteSize() const { + LLDB_INSTRUMENT_VA(this); + + if (!IsValid()) + return 0; + return m_opaque_up->GetByteSize(); +} + +bool SBAddressRange::GetDescription(SBStream &description, + const SBTarget target) { + LLDB_INSTRUMENT_VA(this, description, target); + + Stream &stream = description.ref(); + if (!IsValid()) { + stream << ""; + return true; + } + m_opaque_up->GetDescription(&stream, target.GetSP().get()); + return true; +} diff --git a/lldb/source/API/SBAddressRangeList.cpp b/lldb/source/API/SBAddressRangeList.cpp new file mode 100644 index 00000000000000..20660b3ff20882 --- /dev/null +++ b/lldb/source/API/SBAddressRangeList.cpp @@ -0,0 +1,94 @@ +//===-- SBAddressRangeList.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/API/SBAddressRangeList.h" +#include "Utils.h" +#include "lldb/API/SBAddressRange.h" +#include "lldb/API/SBStream.h" +#include "lldb/API/SBTarget.h" +#include "lldb/Core/AddressRangeListImpl.h" +#include "lldb/Utility/Instrumentation.h" +#include "lldb/Utility/Stream.h" + +#include + +using namespace lldb; +using namespace lldb_private; + +SBAddressRangeList::SBAddressRangeList() + : m_opaque_up(std::make_unique()) { + LLDB_INSTRUMENT_VA(this); +} + +SBAddressRangeList::SBAddressRangeList(const SBAddressRangeList &rhs) + : m_opaque_up(std::make_unique(*rhs.m_opaque_up)) { + LLDB_INSTRUMENT_VA(this, rhs); +} + +SBAddressRangeList::~SBAddressRangeList() = default; + +const SBAddressRangeList & +SBAddressRangeList::operator=(const SBAddressRangeList &rhs) { + LLDB_INSTRUMENT_VA(this, rhs); + + if (this != &rhs) + *m_opaque_up = *rhs.m_opaque_up; + return *this; +} + +uint32_t SBAddressRangeList::GetSize() const { + LLDB_INSTRUMENT_VA(this); + + return m_opaque_up->GetSize(); +} + +SBAddressRange SBAddressRangeList::GetAddressRangeAtIndex(uint64_t idx) { + LLDB_INSTRUMENT_VA(this, idx); + + SBAddressRange sb_addr_range; + (*sb_addr_range.m_opaque_up) = m_opaque_up->GetAddressRangeAtIndex(idx); + return sb_addr_range; +} + +void SBAddressRangeList::Clear() { + LLDB_INSTRUMENT_VA(this); + + m_opaque_up->Clear(); +} + +void SBAddressRangeList::Append(const SBAddressRange &sb_addr_range) { + LLDB_INSTRUMENT_VA(this, sb_addr_range); + + m_opaque_up->Append(*sb_addr_range.m_opaque_up); +} + +void SBAddressRangeList::Append(const SBAddressRangeList &sb_addr_range_list) { + LLDB_INSTRUMENT_VA(this, sb_addr_range_list); + + m_opaque_up->Append(*sb_addr_range_list.m_opaque_up); +} + +bool SBAddressRangeList::GetDescription(SBStream &description, + const SBTarget &target) { + LLDB_INSTRUMENT_VA(this, description, target); + + const uint32_t num_ranges = GetSize(); + bool is_first = true; + Stream &stream = description.ref(); + stream << "["; + for (uint32_t i = 0; i < num_ranges; ++i) { + if (is_first) { + is_first = false; + } else { + stream.Printf(", "); + } + GetAddressRangeAtIndex(i).GetDescription(description, target); + } + stream << "]"; + return true; +} diff --git a/lldb/source/API/SBBlock.cpp b/lldb/source/API/SBBlock.cpp index 7d7565340836b1..2577b14920f065 100644 --- a/lldb/source/API/SBBlock.cpp +++ b/lldb/source/API/SBBlock.cpp @@ -13,6 +13,7 @@ #include "lldb/API/SBStream.h" #include "lldb/API/SBValue.h" #include "lldb/Core/AddressRange.h" +#include "lldb/Core/AddressRangeListImpl.h" #include "lldb/Core/ValueObjectVariable.h" #include "lldb/Symbol/Block.h" #include "lldb/Symbol/Function.h" @@ -219,6 +220,15 @@ lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) { return sb_addr; } +lldb::SBAddressRangeList SBBlock::GetRanges() { + LLDB_INSTRUMENT_VA(this); + + lldb::SBAddressRangeList sb_ranges; + if (m_opaque_ptr) + sb_ranges.m_opaque_up->ref() = m_opaque_ptr->GetRanges(); + return sb_ranges; +} + uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) { LLDB_INSTRUMENT_VA(this, block_addr); diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index a01c7f79bbd31f..6a97352fc2c2fd 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFunction.h" +#include "lldb/API/SBAddressRange.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Disassembler.h" @@ -160,6 +161,19 @@ SBAddress SBFunction::GetEndAddress() { return addr; } +lldb::SBAddressRangeList SBFunction::GetRanges() { + LLDB_INSTRUMENT_VA(this); + + lldb::SBAddressRangeList ranges; + if (m_opaque_ptr) { + lldb::SBAddressRange range; + (*range.m_opaque_up) = m_opaque_ptr->GetAddressRange(); + ranges.Append(std::move(range)); + } + + return ranges; +} + const char *SBFunction::GetArgumentName(uint32_t arg_idx) { LLDB_INSTRUMENT_VA(this, arg_idx); diff --git a/lldb/source/Core/AddressRange.cpp b/lldb/source/Core/AddressRange.cpp index 1830f2ccd47fec..6cef7e149cd20b 100644 --- a/lldb/source/Core/AddressRange.cpp +++ b/lldb/source/Core/AddressRange.cpp @@ -14,6 +14,7 @@ #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-defines.h" +#include "lldb/lldb-types.h" #include "llvm/Support/Compiler.h" @@ -145,6 +146,10 @@ void AddressRange::Clear() { m_byte_size = 0; } +bool AddressRange::IsValid() const { + return m_base_addr.IsValid() && (m_byte_size > 0); +} + bool AddressRange::Dump(Stream *s, Target *target, Address::DumpStyle style, Address::DumpStyle fallback_style) const { addr_t vmaddr = LLDB_INVALID_ADDRESS; @@ -203,3 +208,41 @@ void AddressRange::DumpDebug(Stream *s) const { static_cast(m_base_addr.GetSection().get()), m_base_addr.GetOffset(), GetByteSize()); } + +bool AddressRange::GetDescription(Stream *s, Target *target) const { + addr_t start_addr = m_base_addr.GetLoadAddress(target); + if (start_addr != LLDB_INVALID_ADDRESS) { + // We have a valid target and the address was resolved, or we have a base + // address with no section. Just print out a raw address range: [, + // ) + s->Printf("[0x%" PRIx64 "-0x%" PRIx64 ")", start_addr, + start_addr + GetByteSize()); + return true; + } + + // Either no target or the address wasn't resolved, print as + // [-) + const char *file_name = ""; + const auto section_sp = m_base_addr.GetSection(); + if (section_sp) { + if (const auto object_file = section_sp->GetObjectFile()) + file_name = object_file->GetFileSpec().GetFilename().AsCString(); + } + start_addr = m_base_addr.GetFileAddress(); + const addr_t end_addr = (start_addr == LLDB_INVALID_ADDRESS) + ? LLDB_INVALID_ADDRESS + : start_addr + GetByteSize(); + s->Printf("%s[0x%" PRIx64 "-0x%" PRIx64 ")", file_name, start_addr, end_addr); + return true; +} + +bool AddressRange::operator==(const AddressRange &rhs) { + if (!IsValid() || !rhs.IsValid()) + return false; + return m_base_addr == rhs.GetBaseAddress() && + m_byte_size == rhs.GetByteSize(); +} + +bool AddressRange::operator!=(const AddressRange &rhs) { + return !(*this == rhs); +} diff --git a/lldb/source/Core/AddressRangeListImpl.cpp b/lldb/source/Core/AddressRangeListImpl.cpp new file mode 100644 index 00000000000000..d405cf0fa3ec35 --- /dev/null +++ b/lldb/source/Core/AddressRangeListImpl.cpp @@ -0,0 +1,50 @@ +//===-- AddressRangeListImpl.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/AddressRangeListImpl.h" + +using namespace lldb; +using namespace lldb_private; + +AddressRangeListImpl::AddressRangeListImpl() : m_ranges() {} + +AddressRangeListImpl & +AddressRangeListImpl::operator=(const AddressRangeListImpl &rhs) { + if (this == &rhs) + return *this; + m_ranges = rhs.m_ranges; + return *this; +} + +size_t AddressRangeListImpl::GetSize() const { return m_ranges.size(); } + +void AddressRangeListImpl::Reserve(size_t capacity) { + m_ranges.reserve(capacity); +} + +void AddressRangeListImpl::Append(const AddressRange &sb_region) { + m_ranges.emplace_back(sb_region); +} + +void AddressRangeListImpl::Append(const AddressRangeListImpl &list) { + Reserve(GetSize() + list.GetSize()); + + for (const auto &range : list.m_ranges) + Append(range); +} + +void AddressRangeListImpl::Clear() { m_ranges.clear(); } + +lldb_private::AddressRange +AddressRangeListImpl::GetAddressRangeAtIndex(size_t index) { + if (index >= GetSize()) + return AddressRange(); + return m_ranges[index]; +} + +AddressRanges &AddressRangeListImpl::ref() { return m_ranges; } diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index f24dbbd45a8e8c..dbc620b91b1ed1 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -20,6 +20,7 @@ endif() add_lldb_library(lldbCore Address.cpp AddressRange.cpp + AddressRangeListImpl.cpp AddressResolver.cpp AddressResolverFileLine.cpp Communication.cpp diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp index ca582cb1d5a46f..ddaa7a8a597b4f 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp @@ -13,6 +13,8 @@ #include "lldb/Symbol/CompilerType.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/StringLexer.h" #include "clang/Basic/TargetInfo.h" @@ -234,12 +236,15 @@ clang::QualType AppleObjCTypeEncodingParser::BuildObjCObjectPointerType( auto types = decl_vendor->FindTypes(ConstString(name), /*max_matches*/ 1); - // The user can forward-declare something that has no definition. The runtime - // doesn't prohibit this at all. This is a rare and very weird case. We keep - // this assert in debug builds so we catch other weird cases. - lldbassert(!types.empty()); - if (types.empty()) + if (types.empty()) { + // The user can forward-declare something that has no definition. The + // runtime doesn't prohibit this at all. This is a rare and very weird + // case. Assert assert in debug builds so we catch other weird cases. + assert(false && "forward declaration without definition"); + LLDB_LOG(GetLog(LLDBLog::Types), + "forward declaration without definition: {0}", name); return ast_ctx.getObjCIdType(); + } return ClangUtil::GetQualType(types.front().GetPointerType()); } else { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 66db396279e063..e144cf0f9bd94e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -60,6 +60,8 @@ class DWARFASTParser { virtual ConstString GetDIEClassTemplateParams(const DWARFDIE &die) = 0; + virtual lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) = 0; + static std::optional ParseChildArrayInfo(const DWARFDIE &parent_die, const ExecutionContext *exe_ctx = nullptr); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index f8101aba5c6277..e0b1b430b266f3 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -154,6 +154,26 @@ static bool TagIsRecordType(dw_tag_t tag) { } } +static bool IsForwardDeclaration(const DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs, + LanguageType cu_language) { + if (attrs.is_forward_declaration) + return true; + + // Work around an issue with clang at the moment where forward + // declarations for objective C classes are emitted as: + // DW_TAG_structure_type [2] + // DW_AT_name( "ForwardObjcClass" ) + // DW_AT_byte_size( 0x00 ) + // DW_AT_decl_file( "..." ) + // DW_AT_decl_line( 1 ) + // + // Note that there is no DW_AT_declaration and there are no children, + // and the byte size is zero. + return attrs.byte_size && *attrs.byte_size == 0 && attrs.name && + !die.HasChildren() && cu_language == eLanguageTypeObjC; +} + TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc, const DWARFDIE &die, Log *log) { @@ -249,11 +269,9 @@ static void ForcefullyCompleteType(CompilerType type) { /// This function serves a similar purpose as RequireCompleteType above, but it /// avoids completing the type if it is not immediately necessary. It only /// ensures we _can_ complete the type later. -static void PrepareContextToReceiveMembers(TypeSystemClang &ast, - ClangASTImporter &ast_importer, - clang::DeclContext *decl_ctx, - DWARFDIE die, - const char *type_name_cstr) { +void DWARFASTParserClang::PrepareContextToReceiveMembers( + clang::DeclContext *decl_ctx, const DWARFDIE &decl_ctx_die, + const DWARFDIE &die, const char *type_name_cstr) { auto *tag_decl_ctx = clang::dyn_cast(decl_ctx); if (!tag_decl_ctx) return; // Non-tag context are always ready. @@ -268,7 +286,8 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, // gmodules case), we can complete the type by doing a full import. // If this type was not imported from an external AST, there's nothing to do. - CompilerType type = ast.GetTypeForDecl(tag_decl_ctx); + CompilerType type = m_ast.GetTypeForDecl(tag_decl_ctx); + ClangASTImporter &ast_importer = GetClangASTImporter(); if (type && ast_importer.CanImport(type)) { auto qual_type = ClangUtil::GetQualType(type); if (ast_importer.RequireCompleteType(qual_type)) @@ -279,6 +298,13 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, type_name_cstr ? type_name_cstr : "", die.GetOffset()); } + // By searching for the definition DIE of the decl_ctx type, we will either: + // 1. Found the the definition DIE and start its definition with + // TypeSystemClang::StartTagDeclarationDefinition. + // 2. Unable to find it, then need to forcefully complete it. + FindDefinitionTypeForDIE(decl_ctx_die); + if (tag_decl_ctx->isCompleteDefinition() || tag_decl_ctx->isBeingDefined()) + return; // We don't have a type definition and/or the import failed. We must // forcefully complete the type to avoid crashes. ForcefullyCompleteType(type); @@ -620,10 +646,11 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, if (tag == DW_TAG_typedef) { // DeclContext will be populated when the clang type is materialized in // Type::ResolveCompilerType. - PrepareContextToReceiveMembers( - m_ast, GetClangASTImporter(), - GetClangDeclContextContainingDIE(die, nullptr), die, - attrs.name.GetCString()); + DWARFDIE decl_ctx_die; + clang::DeclContext *decl_ctx = + GetClangDeclContextContainingDIE(die, &decl_ctx_die); + PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die, + attrs.name.GetCString()); if (attrs.type.IsValid()) { // Try to parse a typedef from the (DWARF embedded in the) Clang @@ -1103,32 +1130,6 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, // struct and see if this is actually a C++ method Type *class_type = dwarf->ResolveType(decl_ctx_die); if (class_type) { - if (class_type->GetID() != decl_ctx_die.GetID() || - IsClangModuleFwdDecl(decl_ctx_die)) { - - // We uniqued the parent class of this function to another - // class so we now need to associate all dies under - // "decl_ctx_die" to DIEs in the DIE for "class_type"... - DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); - - if (class_type_die) { - std::vector failures; - - CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, - class_type, failures); - - // FIXME do something with these failures that's - // smarter than just dropping them on the ground. - // Unfortunately classes don't like having stuff added - // to them after their definitions are complete... - - Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; - if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { - return type_ptr->shared_from_this(); - } - } - } - if (attrs.specification.IsValid()) { // We have a specification which we are going to base our // function prototype off of, so we need this type to be @@ -1263,6 +1264,39 @@ DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, } } } + // By here, we should have already completed the c++ class_type + // because if either specification or abstract_origin is present, we + // call GetClangDeclContextForDIE to resolve the DW_TAG_subprogram + // refered by this one until we reached the DW_TAG_subprogram without + // specification or abstract_origin (the else branch above). Then the + // above GetFullCompilerType() will complete the class_type if it's + // not completed yet. After that, we will have the mapping from DIEs + // in class_type_die to DeclContexts in m_die_to_decl_ctx. + if (class_type->GetID() != decl_ctx_die.GetID() || + IsClangModuleFwdDecl(decl_ctx_die)) { + + // We uniqued the parent class of this function to another + // class so we now need to associate all dies under + // "decl_ctx_die" to DIEs in the DIE for "class_type"... + DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); + + if (class_type_die) { + std::vector failures; + + CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, + class_type, failures); + + // FIXME do something with these failures that's + // smarter than just dropping them on the ground. + // Unfortunately classes don't like having stuff added + // to them after their definitions are complete... + + Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; + if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { + return type_ptr->shared_from_this(); + } + } + } } } } @@ -1635,6 +1669,93 @@ DWARFASTParserClang::GetCPlusPlusQualifiedName(const DWARFDIE &die) { return qualified_name; } +lldb_private::Type * +DWARFASTParserClang::FindDefinitionTypeForDIE(const DWARFDIE &die) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + ParsedDWARFTypeAttributes attrs(die); + bool is_forward_declaration = IsForwardDeclaration( + die, attrs, SymbolFileDWARF::GetLanguage(*die.GetCU())); + if (!is_forward_declaration) + return dwarf->GetDIEToType()[die.GetDIE()]; + + const dw_tag_t tag = die.Tag(); + TypeSP type_sp; + Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a " + "forward declaration DIE, trying to find definition DIE", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString()); + } + // We haven't parse definition die for this type, starting to search for it. + // After we found the definition die, the GetDeclarationDIEToDefinitionDIE() + // map will have the new mapping from this declaration die to definition die. + if (attrs.class_language == eLanguageTypeObjC || + attrs.class_language == eLanguageTypeObjC_plus_plus) { + if (!attrs.is_complete_objc_class && + die.Supports_DW_AT_APPLE_objc_complete_type()) { + // We have a valid eSymbolTypeObjCClass class symbol whose name + // matches the current objective C class that we are trying to find + // and this DIE isn't the complete definition (we checked + // is_complete_objc_class above and know it is false), so the real + // definition is in here somewhere + type_sp = + dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true); + + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = + dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, + // see if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE( + die, attrs.name, true); + } + } + + if (type_sp && log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an " + "incomplete objc type, complete type is {5:x8}", + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(), + type_sp->GetID()); + } + } + } + + type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die); + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, see + // if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die); + } + if (type_sp && log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a " + "forward declaration, complete type is {4:x8}", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString(), type_sp->GetID()); + } + } + + if (!type_sp && log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF({0:p}) - {1:x16}: {2} type \"{3}\" is a " + "forward declaration, unable to find definition DIE for it", + static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), + attrs.name.GetCString()); + } + return type_sp.get(); +} + TypeSP DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, const DWARFDIE &die, @@ -1646,14 +1767,10 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU()); Log *log = GetLog(DWARFLog::TypeCompletion | DWARFLog::Lookups); - // UniqueDWARFASTType is large, so don't create a local variables on the - // stack, put it on the heap. This function is often called recursively and - // clang isn't good at sharing the stack space for variables in different - // blocks. - auto unique_ast_entry_up = std::make_unique(); - ConstString unique_typename(attrs.name); Declaration unique_decl(attrs.decl); + uint64_t byte_size = attrs.byte_size.value_or(0); + attrs.is_forward_declaration = IsForwardDeclaration(die, attrs, cu_language); if (attrs.name) { if (Language::LanguageIsCPlusPlus(cu_language)) { @@ -1666,14 +1783,42 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, unique_decl.Clear(); } - if (dwarf->GetUniqueDWARFASTTypeMap().Find( - unique_typename, die, unique_decl, attrs.byte_size.value_or(-1), - *unique_ast_entry_up)) { - type_sp = unique_ast_entry_up->m_type_sp; + if (UniqueDWARFASTType *unique_ast_entry_type = + dwarf->GetUniqueDWARFASTTypeMap().Find( + unique_typename, die, unique_decl, byte_size, + attrs.is_forward_declaration)) { + type_sp = unique_ast_entry_type->m_type_sp; if (type_sp) { dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); LinkDeclContextToDIE( - GetCachedClangDeclContextForDIE(unique_ast_entry_up->m_die), die); + GetCachedClangDeclContextForDIE(unique_ast_entry_type->m_die), die); + if (!attrs.is_forward_declaration) { + // If the DIE being parsed in this function is a definition and the + // entry in the map is a declaration, then we need to update the entry + // to point to the definition DIE. + if (unique_ast_entry_type->m_is_forward_declaration) { + unique_ast_entry_type->m_die = die; + unique_ast_entry_type->m_byte_size = byte_size; + unique_ast_entry_type->m_declaration = unique_decl; + unique_ast_entry_type->m_is_forward_declaration = false; + // Need to update Type ID to refer to the definition DIE. because + // it's used in ParseSubroutine to determine if we need to copy cxx + // method types from a declaration DIE to this definition DIE. + type_sp->SetID(die.GetID()); + clang_type = type_sp->GetForwardCompilerType(); + if (attrs.class_language != eLanguageTypeObjC && + attrs.class_language != eLanguageTypeObjC_plus_plus) + TypeSystemClang::StartTagDeclarationDefinition(clang_type); + + CompilerType compiler_type_no_qualifiers = + ClangUtil::RemoveFastQualifiers(clang_type); + auto result = dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace( + compiler_type_no_qualifiers.GetOpaqueQualType(), + *die.GetDIERef()); + if (!result.second) + result.first->second = *die.GetDIERef(); + } + } return type_sp; } } @@ -1695,125 +1840,21 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, default_accessibility = eAccessPrivate; } - if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name && - !die.HasChildren() && cu_language == eLanguageTypeObjC) { - // Work around an issue with clang at the moment where forward - // declarations for objective C classes are emitted as: - // DW_TAG_structure_type [2] - // DW_AT_name( "ForwardObjcClass" ) - // DW_AT_byte_size( 0x00 ) - // DW_AT_decl_file( "..." ) - // DW_AT_decl_line( 1 ) - // - // Note that there is no DW_AT_declaration and there are no children, - // and the byte size is zero. - attrs.is_forward_declaration = true; - } - - if (attrs.class_language == eLanguageTypeObjC || - attrs.class_language == eLanguageTypeObjC_plus_plus) { - if (!attrs.is_complete_objc_class && - die.Supports_DW_AT_APPLE_objc_complete_type()) { - // We have a valid eSymbolTypeObjCClass class symbol whose name - // matches the current objective C class that we are trying to find - // and this DIE isn't the complete definition (we checked - // is_complete_objc_class above and know it is false), so the real - // definition is in here somewhere - type_sp = - dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = - dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, - // see if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE( - die, attrs.name, true); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is an " - "incomplete objc type, complete type is {5:x8}", - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this - // die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - return type_sp; - } - } - } - if (attrs.is_forward_declaration) { - // We have a forward declaration to a type and we need to try and - // find a full declaration. We look in the current type index just in - // case we have a forward declaration followed by an actual - // declarations in the DWARF. If this fails, we need to look - // elsewhere... - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a " - "forward declaration, trying to find complete type", - static_cast(this), die.GetOffset(), DW_TAG_value_to_name(tag), - tag, attrs.name.GetCString()); - } - // See if the type comes from a Clang module and if so, track down // that type. type_sp = ParseTypeFromClangModule(sc, die, log); if (type_sp) return type_sp; - - // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die, - // type_name_const_str); - type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, see - // if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(die); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF({0:p}) - {1:x16}: {2} ({3}) type \"{4}\" is a " - "forward declaration, complete type is {5:x8}", - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), tag, attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - clang::DeclContext *defn_decl_ctx = - GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); - if (defn_decl_ctx) - LinkDeclContextToDIE(defn_decl_ctx, die); - return type_sp; - } } + assert(tag_decl_kind != -1); UNUSED_IF_ASSERT_DISABLED(tag_decl_kind); - bool clang_type_was_created = false; - clang::DeclContext *decl_ctx = GetClangDeclContextContainingDIE(die, nullptr); + DWARFDIE decl_ctx_die; + clang::DeclContext *decl_ctx = + GetClangDeclContextContainingDIE(die, &decl_ctx_die); - PrepareContextToReceiveMembers(m_ast, GetClangASTImporter(), decl_ctx, die, + PrepareContextToReceiveMembers(decl_ctx, decl_ctx_die, die, attrs.name.GetCString()); if (attrs.accessibility == eAccessNone && decl_ctx) { @@ -1852,20 +1893,17 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, tag_decl_kind, template_param_infos); clang_type = m_ast.CreateClassTemplateSpecializationType(class_specialization_decl); - clang_type_was_created = true; m_ast.SetMetadata(class_template_decl, metadata); m_ast.SetMetadata(class_specialization_decl, metadata); } - if (!clang_type_was_created) { - clang_type_was_created = true; + if (!clang_type) { clang_type = m_ast.CreateRecordType( decl_ctx, GetOwningClangModule(die), attrs.accessibility, attrs.name.GetCString(), tag_decl_kind, attrs.class_language, &metadata, attrs.exports_symbols); } - // Store a forward declaration to this class type in case any // parameters in any class methods need it for the clang types for // function prototypes. @@ -1876,13 +1914,19 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, Type::ResolveState::Forward, TypePayloadClang(OptionalClangModuleID(), attrs.is_complete_objc_class)); + // UniqueDWARFASTType is large, so don't create a local variables on the + // stack, put it on the heap. This function is often called recursively and + // clang isn't good at sharing the stack space for variables in different + // blocks. + auto unique_ast_entry_up = std::make_unique(); // Add our type to the unique type map so we don't end up creating many // copies of the same type over and over in the ASTContext for our // module unique_ast_entry_up->m_type_sp = type_sp; unique_ast_entry_up->m_die = die; unique_ast_entry_up->m_declaration = unique_decl; - unique_ast_entry_up->m_byte_size = attrs.byte_size.value_or(0); + unique_ast_entry_up->m_byte_size = byte_size; + unique_ast_entry_up->m_is_forward_declaration = attrs.is_forward_declaration; dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename, *unique_ast_entry_up); @@ -1923,7 +1967,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, GetClangASTImporter().SetRecordLayout(record_decl, layout); } } - } else if (clang_type_was_created) { + } else { // Start the definition if the class is not objective C since the // underlying decls respond to isCompleteDefinition(). Objective // C decls don't respond to isCompleteDefinition() so we can't @@ -1935,26 +1979,21 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, if (attrs.class_language != eLanguageTypeObjC && attrs.class_language != eLanguageTypeObjC_plus_plus) TypeSystemClang::StartTagDeclarationDefinition(clang_type); - - // Leave this as a forward declaration until we need to know the - // details of the type. lldb_private::Type will automatically call - // the SymbolFile virtual function - // "SymbolFileDWARF::CompleteType(Type *)" When the definition - // needs to be defined. - assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count( - ClangUtil::RemoveFastQualifiers(clang_type) - .GetOpaqueQualType()) && - "Type already in the forward declaration map!"); - // Can't assume m_ast.GetSymbolFile() is actually a - // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple - // binaries. - dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace( - ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(), - *die.GetDIERef()); - m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true); } } + // If this is a declaration DIE, leave this as a forward declaration until we + // need to know the details of the type. lldb_private::Type will automatically + // call the SymbolFile virtual function "SymbolFileDWARF::CompleteType(Type + // *)" When the definition needs to be defined. + assert(!dwarf->GetForwardDeclCompilerTypeToDIE().count( + ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()) && + "Type already in the forward declaration map!"); + dwarf->GetForwardDeclCompilerTypeToDIE().try_emplace( + ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType(), + *die.GetDIERef()); + m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true); + // If we made a clang type, set the trivial abi if applicable: We only // do this for pass by value - which implies the Trivial ABI. There // isn't a way to assert that something that would normally be pass by diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 8d4af203bb2871..853b8ccc30369f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -42,40 +42,40 @@ struct ParsedDWARFTypeAttributes; class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { public: + typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE; + DWARFASTParserClang(lldb_private::TypeSystemClang &ast); ~DWARFASTParserClang() override; // DWARFASTParser interface. - lldb::TypeSP - ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - bool *type_is_new_ptr) override; + lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + bool *type_is_new_ptr) override; - lldb_private::ConstString ConstructDemangledNameFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::ConstString + ConstructDemangledNameFromDWARF(const DWARFDIE &die) override; lldb_private::Function * ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, - const lldb_private::plugin::dwarf::DWARFDIE &die, + const DWARFDIE &die, const lldb_private::AddressRange &func_range) override; bool - CompleteTypeFromDWARF(const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Type *type, + CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &compiler_type) override; - lldb_private::CompilerDecl GetDeclForUIDFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::CompilerDecl + GetDeclForUIDFromDWARF(const DWARFDIE &die) override; void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; - lldb_private::CompilerDeclContext GetDeclContextForUIDFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::CompilerDeclContext + GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; - lldb_private::CompilerDeclContext GetDeclContextContainingUIDFromDWARF( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::CompilerDeclContext + GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; lldb_private::ClangASTImporter &GetClangASTImporter(); @@ -105,8 +105,13 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \return A string, including surrounding '<>', of the template parameters. /// If the DIE's name already has '<>', returns an empty ConstString because /// it's assumed that the caller is using the DIE name anyway. - lldb_private::ConstString GetDIEClassTemplateParams( - const lldb_private::plugin::dwarf::DWARFDIE &die) override; + lldb_private::ConstString + GetDIEClassTemplateParams(const DWARFDIE &die) override; + + // Searching for definition DIE for the given DIE and return the type + // associated with the definition DIE, or nullptr if definition DIE is not + // found. + lldb_private::Type *FindDefinitionTypeForDIE(const DWARFDIE &die) override; protected: /// Protected typedefs and members. @@ -118,8 +123,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, clang::DeclContext *> DIEToDeclContextMap; - typedef std::multimap + typedef std::multimap DeclContextToDIEMap; typedef llvm::DenseMap< const lldb_private::plugin::dwarf::DWARFDebugInfoEntry *, @@ -137,14 +141,11 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { std::unique_ptr m_clang_ast_importer_up; /// @} - clang::DeclContext * - GetDeclContextForBlock(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetDeclContextForBlock(const DWARFDIE &die); - clang::BlockDecl * - ResolveBlockDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::BlockDecl *ResolveBlockDIE(const DWARFDIE &die); - clang::NamespaceDecl * - ResolveNamespaceDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::NamespaceDecl *ResolveNamespaceDIE(const DWARFDIE &die); /// Returns the namespace decl that a DW_TAG_imported_declaration imports. /// @@ -155,96 +156,86 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// 'die' imports. If the imported entity is not a namespace /// or another import declaration, returns nullptr. If an error /// occurs, returns nullptr. - clang::NamespaceDecl *ResolveImportedDeclarationDIE( - const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::NamespaceDecl *ResolveImportedDeclarationDIE(const DWARFDIE &die); - bool ParseTemplateDIE(const lldb_private::plugin::dwarf::DWARFDIE &die, + bool ParseTemplateDIE(const DWARFDIE &die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); bool ParseTemplateParameterInfos( - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + const DWARFDIE &parent_die, lldb_private::TypeSystemClang::TemplateParameterInfos &template_param_infos); - std::string - GetCPlusPlusQualifiedName(const lldb_private::plugin::dwarf::DWARFDIE &die); + std::string GetCPlusPlusQualifiedName(const DWARFDIE &die); bool ParseChildMembers( - const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::CompilerType &class_compiler_type, + const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type, std::vector> &base_classes, - std::vector &member_function_dies, - std::vector &contained_type_dies, + std::vector &member_function_dies, + std::vector &contained_type_dies, DelayedPropertyList &delayed_properties, const lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); size_t ParseChildParameters(clang::DeclContext *containing_decl_ctx, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, - bool skip_artificial, bool &is_static, bool &is_variadic, + const DWARFDIE &parent_die, bool skip_artificial, + bool &is_static, bool &is_variadic, bool &has_template_params, std::vector &function_args, std::vector &function_param_decls, unsigned &type_quals); - size_t ParseChildEnumerators( - lldb_private::CompilerType &compiler_type, bool is_signed, - uint32_t enumerator_byte_size, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die); + size_t ParseChildEnumerators(lldb_private::CompilerType &compiler_type, + bool is_signed, uint32_t enumerator_byte_size, + const DWARFDIE &parent_die); /// Parse a structure, class, or union type DIE. - lldb::TypeSP - ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseStructureLikeDIE(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); - clang::Decl * - GetClangDeclForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::Decl *GetClangDeclForDIE(const DWARFDIE &die); - clang::DeclContext * - GetClangDeclContextForDIE(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetClangDeclContextForDIE(const DWARFDIE &die); - clang::DeclContext *GetClangDeclContextContainingDIE( - const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::plugin::dwarf::DWARFDIE *decl_ctx_die); - lldb_private::OptionalClangModuleID - GetOwningClangModule(const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetClangDeclContextContainingDIE(const DWARFDIE &die, + DWARFDIE *decl_ctx_die); + lldb_private::OptionalClangModuleID GetOwningClangModule(const DWARFDIE &die); - bool CopyUniqueClassMethodTypes( - const lldb_private::plugin::dwarf::DWARFDIE &src_class_die, - const lldb_private::plugin::dwarf::DWARFDIE &dst_class_die, - lldb_private::Type *class_type, - std::vector &failures); + bool CopyUniqueClassMethodTypes(const DWARFDIE &src_class_die, + const DWARFDIE &dst_class_die, + lldb_private::Type *class_type, + std::vector &failures); - clang::DeclContext *GetCachedClangDeclContextForDIE( - const lldb_private::plugin::dwarf::DWARFDIE &die); + clang::DeclContext *GetCachedClangDeclContextForDIE(const DWARFDIE &die); - void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, - const lldb_private::plugin::dwarf::DWARFDIE &die); + void LinkDeclContextToDIE(clang::DeclContext *decl_ctx, const DWARFDIE &die); - void LinkDeclToDIE(clang::Decl *decl, - const lldb_private::plugin::dwarf::DWARFDIE &die); + void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. /// /// Returns \p type_sp. - lldb::TypeSP UpdateSymbolContextScopeForType( - const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, lldb::TypeSP type_sp); + lldb::TypeSP + UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, lldb::TypeSP type_sp); /// Follow Clang Module Skeleton CU references to find a type definition. - lldb::TypeSP - ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Log *log); + lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + lldb_private::Log *log); // Return true if this type is a declaration to a type in an external // module. - lldb::ModuleSP - GetModuleForType(const lldb_private::plugin::dwarf::DWARFDIE &die); + lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + + void PrepareContextToReceiveMembers(clang::DeclContext *decl_ctx, + const DWARFDIE &decl_ctx_die, + const DWARFDIE &die, + const char *type_name_cstr); static bool classof(const DWARFASTParser *Parser) { return Parser->GetKind() == Kind::DWARFASTParserClang; @@ -274,10 +265,8 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// Parsed form of all attributes that are relevant for parsing type members. struct MemberAttributes { - explicit MemberAttributes( - const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, - lldb::ModuleSP module_sp); + explicit MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die, + lldb::ModuleSP module_sp); const char *name = nullptr; /// Indicates how many bits into the word (according to the host endianness) /// the low-order bit of the field starts. Can be negative. @@ -324,15 +313,12 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// created property. /// \param delayed_properties The list of delayed properties that the result /// will be appended to. - void - ParseObjCProperty(const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, - const lldb_private::CompilerType &class_clang_type, - DelayedPropertyList &delayed_properties); + void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die, + const lldb_private::CompilerType &class_clang_type, + DelayedPropertyList &delayed_properties); void - ParseSingleMember(const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType &class_clang_type, lldb::AccessType default_accessibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info, @@ -350,31 +336,25 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param[in] class_clang_type The parent RecordType of the static /// member this function will create. void CreateStaticMemberVariable( - const lldb_private::plugin::dwarf::DWARFDIE &die, - const MemberAttributes &attrs, + const DWARFDIE &die, const MemberAttributes &attrs, const lldb_private::CompilerType &class_clang_type); - bool CompleteRecordType(const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Type *type, + bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &clang_type); - bool CompleteEnumType(const lldb_private::plugin::dwarf::DWARFDIE &die, - lldb_private::Type *type, + bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &clang_type); - lldb::TypeSP - ParseTypeModifier(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, - const lldb_private::plugin::dwarf::DWARFDIE &die, - ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseSubroutine(const lldb_private::plugin::dwarf::DWARFDIE &die, + const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseSubroutine(const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP ParseArrayType(const lldb_private::plugin::dwarf::DWARFDIE &die, + lldb::TypeSP ParseArrayType(const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); - lldb::TypeSP - ParsePointerToMemberType(const lldb_private::plugin::dwarf::DWARFDIE &die, - const ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs); /// Parses a DW_TAG_inheritance DIE into a base/super class. /// @@ -391,8 +371,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param layout_info The layout information that will be updated for C++ /// base classes with the base offset. void ParseInheritance( - const lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + const DWARFDIE &die, const DWARFDIE &parent_die, const lldb_private::CompilerType class_clang_type, const lldb::AccessType default_accessibility, const lldb::ModuleSP &module_sp, @@ -409,8 +388,7 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// \param layout_info The layout information that will be updated for // base classes with the base offset void - ParseRustVariantPart(lldb_private::plugin::dwarf::DWARFDIE &die, - const lldb_private::plugin::dwarf::DWARFDIE &parent_die, + ParseRustVariantPart(DWARFDIE &die, const DWARFDIE &parent_die, lldb_private::CompilerType &class_clang_type, const lldb::AccessType default_accesibility, lldb_private::ClangASTImporter::LayoutInfo &layout_info); @@ -420,8 +398,9 @@ class DWARFASTParserClang : public lldb_private::plugin::dwarf::DWARFASTParser { /// Some attributes are relevant for all kinds of types (declaration), while /// others are only meaningful to a specific type (is_virtual) struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes( - const lldb_private::plugin::dwarf::DWARFDIE &die); + typedef lldb_private::plugin::dwarf::DWARFDIE DWARFDIE; + + explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); lldb::AccessType accessibility = lldb::eAccessNone; bool is_artificial = false; @@ -438,7 +417,7 @@ struct ParsedDWARFTypeAttributes { const char *mangled_name = nullptr; lldb_private::ConstString name; lldb_private::Declaration decl; - lldb_private::plugin::dwarf::DWARFDIE object_pointer; + DWARFDIE object_pointer; lldb_private::plugin::dwarf::DWARFFormValue abstract_origin; lldb_private::plugin::dwarf::DWARFFormValue containing_type; lldb_private::plugin::dwarf::DWARFFormValue signature; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index 79400e36e04f3f..c98e5481609dea 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -87,6 +87,10 @@ bool DebugNamesDWARFIndex::ProcessEntry( DWARFDIE die = dwarf.GetDIE(*ref); if (!die) return true; + // Clang erroneously emits index entries for declaration DIEs in case when the + // definition is in a type unit (llvm.org/pr77696). Weed those out. + if (die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0)) + return true; return callback(die); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index f6f152726bf74e..bc489e5b8ad465 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -481,6 +481,13 @@ static ConstString GetDWARFMachOSegmentName() { return g_dwarf_section_name; } +llvm::DenseMap & +SymbolFileDWARF::GetForwardDeclCompilerTypeToDIE() { + if (SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile()) + return debug_map_symfile->GetForwardDeclCompilerTypeToDIE(); + return m_forward_decl_compiler_type_to_die; +} + UniqueDWARFASTTypeMap &SymbolFileDWARF::GetUniqueDWARFASTTypeMap() { SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile(); if (debug_map_symfile) @@ -1632,27 +1639,33 @@ bool SymbolFileDWARF::CompleteType(CompilerType &compiler_type) { return true; } - DWARFDIE dwarf_die = GetDIE(die_it->getSecond()); - if (dwarf_die) { - // Once we start resolving this type, remove it from the forward - // declaration map in case anyone child members or other types require this - // type to get resolved. The type will get resolved when all of the calls - // to SymbolFileDWARF::ResolveClangOpaqueTypeDefinition are done. - GetForwardDeclCompilerTypeToDIE().erase(die_it); - - Type *type = GetDIEToType().lookup(dwarf_die.GetDIE()); + // Once we start resolving this type, remove it from the forward + // declaration map in case anyone's child members or other types require this + // type to get resolved. + DWARFDIE dwarf_die = GetDIE(die_it->second); + GetForwardDeclCompilerTypeToDIE().erase(die_it); + Type *type = nullptr; + if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU())) + type = dwarf_ast->FindDefinitionTypeForDIE(dwarf_die); + if (!type) + return false; - Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion); - if (log) - GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( - log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...", - dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()), - dwarf_die.Tag(), type->GetName().AsCString()); - assert(compiler_type); - if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU())) - return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type); + die_it = GetForwardDeclCompilerTypeToDIE().find( + compiler_type_no_qualifiers.GetOpaqueQualType()); + if (die_it != GetForwardDeclCompilerTypeToDIE().end()) { + dwarf_die = GetDIE(die_it->getSecond()); + GetForwardDeclCompilerTypeToDIE().erase(die_it); } - return false; + + if (Log *log = GetLog(DWARFLog::DebugInfo | DWARFLog::TypeCompletion)) + GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( + log, "{0:x8}: {1} ({2}) '{3}' resolving forward declaration...", + dwarf_die.GetID(), DW_TAG_value_to_name(dwarf_die.Tag()), + dwarf_die.Tag(), type->GetName().AsCString()); + assert(compiler_type); + if (DWARFASTParser *dwarf_ast = GetDWARFParser(*dwarf_die.GetCU())) + return dwarf_ast->CompleteTypeFromDWARF(dwarf_die, type, compiler_type); + return true; } Type *SymbolFileDWARF::ResolveType(const DWARFDIE &die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 7282c08c6857c9..35893f2072dd64 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -335,12 +335,8 @@ class SymbolFileDWARF : public SymbolFileCommon { virtual DIEToTypePtr &GetDIEToType() { return m_die_to_type; } - typedef llvm::DenseMap - CompilerTypeToDIE; - - virtual CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() { - return m_forward_decl_compiler_type_to_die; - } + virtual llvm::DenseMap & + GetForwardDeclCompilerTypeToDIE(); typedef llvm::DenseMap DIEToVariableSP; @@ -533,9 +529,14 @@ class SymbolFileDWARF : public SymbolFileCommon { NameToOffsetMap m_function_scope_qualified_name_map; std::unique_ptr m_ranges; UniqueDWARFASTTypeMap m_unique_ast_type_map; + // A map from DIE to lldb_private::Type. For record type, the key might be + // either declaration DIE or definition DIE. DIEToTypePtr m_die_to_type; DIEToVariableSP m_die_to_variable_sp; - CompilerTypeToDIE m_forward_decl_compiler_type_to_die; + // A map from CompilerType to the struct/class/union/enum DIE (might be a + // declaration or a definition) that is used to construct it. + llvm::DenseMap + m_forward_decl_compiler_type_to_die; llvm::DenseMap> m_type_unit_support_files; std::vector m_lldb_cu_to_dwarf_unit; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index de22dd676eef0a..d7d571919bc7d6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -284,6 +284,11 @@ class SymbolFileDWARFDebugMap : public SymbolFileCommon { lldb::TypeSP FindCompleteObjCDefinitionTypeForDIE( const DWARFDIE &die, ConstString type_name, bool must_be_implementation); + llvm::DenseMap & + GetForwardDeclCompilerTypeToDIE() { + return m_forward_decl_compiler_type_to_die; + } + UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() { return m_unique_ast_type_map; } @@ -321,6 +326,10 @@ class SymbolFileDWARFDebugMap : public SymbolFileCommon { std::vector m_func_indexes; // Sorted by address std::vector m_glob_indexes; std::map>, OSOInfoSP> m_oso_map; + // A map from CompilerType to the struct/class/union/enum DIE (might be a + // declaration or a definition) that is used to construct it. + llvm::DenseMap + m_forward_decl_compiler_type_to_die; UniqueDWARFASTTypeMap m_unique_ast_type_map; LazyBool m_supports_DW_AT_APPLE_objc_complete_type; DebugMap m_debug_map; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp index 85e1afd0d89761..8fd369c65f86b6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp @@ -110,7 +110,7 @@ SymbolFileDWARF::DIEToVariableSP &SymbolFileDWARFDwo::GetDIEToVariable() { return GetBaseSymbolFile().GetDIEToVariable(); } -SymbolFileDWARF::CompilerTypeToDIE & +llvm::DenseMap & SymbolFileDWARFDwo::GetForwardDeclCompilerTypeToDIE() { return GetBaseSymbolFile().GetForwardDeclCompilerTypeToDIE(); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h index 1500540424b524..2f0ac415e90d40 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h @@ -72,7 +72,8 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF { DIEToVariableSP &GetDIEToVariable() override; - CompilerTypeToDIE &GetForwardDeclCompilerTypeToDIE() override; + llvm::DenseMap & + GetForwardDeclCompilerTypeToDIE() override; UniqueDWARFASTTypeMap &GetUniqueDWARFASTTypeMap() override; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp index 223518f0ae8241..4762356034cab7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp @@ -13,66 +13,67 @@ using namespace lldb_private::dwarf; using namespace lldb_private::plugin::dwarf; -bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die, - const lldb_private::Declaration &decl, - const int32_t byte_size, - UniqueDWARFASTType &entry) const { - for (const UniqueDWARFASTType &udt : m_collection) { +UniqueDWARFASTType *UniqueDWARFASTTypeList::Find( + const DWARFDIE &die, const lldb_private::Declaration &decl, + const int32_t byte_size, bool is_forward_declaration) { + for (UniqueDWARFASTType &udt : m_collection) { // Make sure the tags match if (udt.m_die.Tag() == die.Tag()) { - // Validate byte sizes of both types only if both are valid. - if (udt.m_byte_size < 0 || byte_size < 0 || - udt.m_byte_size == byte_size) { - // Make sure the file and line match - if (udt.m_declaration == decl) { - // The type has the same name, and was defined on the same file and - // line. Now verify all of the parent DIEs match. - DWARFDIE parent_arg_die = die.GetParent(); - DWARFDIE parent_pos_die = udt.m_die.GetParent(); - bool match = true; - bool done = false; - while (!done && match && parent_arg_die && parent_pos_die) { - const dw_tag_t parent_arg_tag = parent_arg_die.Tag(); - const dw_tag_t parent_pos_tag = parent_pos_die.Tag(); - if (parent_arg_tag == parent_pos_tag) { - switch (parent_arg_tag) { - case DW_TAG_class_type: - case DW_TAG_structure_type: - case DW_TAG_union_type: - case DW_TAG_namespace: { - const char *parent_arg_die_name = parent_arg_die.GetName(); - if (parent_arg_die_name == - nullptr) // Anonymous (i.e. no-name) struct - { - match = false; - } else { - const char *parent_pos_die_name = parent_pos_die.GetName(); - if (parent_pos_die_name == nullptr || - ((parent_arg_die_name != parent_pos_die_name) && - strcmp(parent_arg_die_name, parent_pos_die_name))) - match = false; - } - } break; - - case DW_TAG_compile_unit: - case DW_TAG_partial_unit: - done = true; - break; - default: - break; - } + // If they are not both definition DIEs or both declaration DIEs, then + // don't check for byte size and declaration location, because declaration + // DIEs usually don't have those info. + bool matching_size_declaration = + udt.m_is_forward_declaration != is_forward_declaration + ? true + : (udt.m_byte_size < 0 || byte_size < 0 || + udt.m_byte_size == byte_size) && + udt.m_declaration == decl; + if (!matching_size_declaration) + continue; + // The type has the same name, and was defined on the same file and + // line. Now verify all of the parent DIEs match. + DWARFDIE parent_arg_die = die.GetParent(); + DWARFDIE parent_pos_die = udt.m_die.GetParent(); + bool match = true; + bool done = false; + while (!done && match && parent_arg_die && parent_pos_die) { + const dw_tag_t parent_arg_tag = parent_arg_die.Tag(); + const dw_tag_t parent_pos_tag = parent_pos_die.Tag(); + if (parent_arg_tag == parent_pos_tag) { + switch (parent_arg_tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_namespace: { + const char *parent_arg_die_name = parent_arg_die.GetName(); + if (parent_arg_die_name == nullptr) { + // Anonymous (i.e. no-name) struct + match = false; + } else { + const char *parent_pos_die_name = parent_pos_die.GetName(); + if (parent_pos_die_name == nullptr || + ((parent_arg_die_name != parent_pos_die_name) && + strcmp(parent_arg_die_name, parent_pos_die_name))) + match = false; } - parent_arg_die = parent_arg_die.GetParent(); - parent_pos_die = parent_pos_die.GetParent(); - } + } break; - if (match) { - entry = udt; - return true; + case DW_TAG_compile_unit: + case DW_TAG_partial_unit: + done = true; + break; + default: + break; } } + parent_arg_die = parent_arg_die.GetParent(); + parent_pos_die = parent_pos_die.GetParent(); + } + + if (match) { + return &udt; } } } - return false; + return nullptr; } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h index bf3cbae55e5c7b..29e5c02dcbe176 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.h @@ -23,31 +23,19 @@ class UniqueDWARFASTType { // Constructors and Destructors UniqueDWARFASTType() : m_type_sp(), m_die(), m_declaration() {} - UniqueDWARFASTType(lldb::TypeSP &type_sp, const DWARFDIE &die, - const Declaration &decl, int32_t byte_size) - : m_type_sp(type_sp), m_die(die), m_declaration(decl), - m_byte_size(byte_size) {} - UniqueDWARFASTType(const UniqueDWARFASTType &rhs) : m_type_sp(rhs.m_type_sp), m_die(rhs.m_die), - m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size) {} + m_declaration(rhs.m_declaration), m_byte_size(rhs.m_byte_size), + m_is_forward_declaration(rhs.m_is_forward_declaration) {} ~UniqueDWARFASTType() = default; - UniqueDWARFASTType &operator=(const UniqueDWARFASTType &rhs) { - if (this != &rhs) { - m_type_sp = rhs.m_type_sp; - m_die = rhs.m_die; - m_declaration = rhs.m_declaration; - m_byte_size = rhs.m_byte_size; - } - return *this; - } - lldb::TypeSP m_type_sp; DWARFDIE m_die; Declaration m_declaration; int32_t m_byte_size = -1; + // True if the m_die is a forward declaration DIE. + bool m_is_forward_declaration = true; }; class UniqueDWARFASTTypeList { @@ -62,8 +50,9 @@ class UniqueDWARFASTTypeList { m_collection.push_back(entry); } - bool Find(const DWARFDIE &die, const Declaration &decl, - const int32_t byte_size, UniqueDWARFASTType &entry) const; + UniqueDWARFASTType *Find(const DWARFDIE &die, const Declaration &decl, + const int32_t byte_size, + bool is_forward_declaration); protected: typedef std::vector collection; @@ -80,14 +69,15 @@ class UniqueDWARFASTTypeMap { m_collection[name.GetCString()].Append(entry); } - bool Find(ConstString name, const DWARFDIE &die, const Declaration &decl, - const int32_t byte_size, UniqueDWARFASTType &entry) const { + UniqueDWARFASTType *Find(ConstString name, const DWARFDIE &die, + const Declaration &decl, const int32_t byte_size, + bool is_forward_declaration) { const char *unique_name_cstr = name.GetCString(); - collection::const_iterator pos = m_collection.find(unique_name_cstr); + collection::iterator pos = m_collection.find(unique_name_cstr); if (pos != m_collection.end()) { - return pos->second.Find(die, decl, byte_size, entry); + return pos->second.Find(die, decl, byte_size, is_forward_declaration); } - return false; + return nullptr; } protected: diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp index fab3ca989c0ec6..17c5f6118603f4 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/UdtRecordCompleter.cpp @@ -47,15 +47,18 @@ UdtRecordCompleter::UdtRecordCompleter( CVType cvt = m_index.tpi().getType(m_id.index); switch (cvt.kind()) { case LF_ENUM: + m_cvr.er.Options = ClassOptions::None; llvm::cantFail(TypeDeserializer::deserializeAs(cvt, m_cvr.er)); break; case LF_UNION: + m_cvr.ur.Options = ClassOptions::None; llvm::cantFail(TypeDeserializer::deserializeAs(cvt, m_cvr.ur)); m_layout.bit_size = m_cvr.ur.getSize() * 8; m_record.record.kind = Member::Union; break; case LF_CLASS: case LF_STRUCTURE: + m_cvr.cr.Options = ClassOptions::None; llvm::cantFail(TypeDeserializer::deserializeAs(cvt, m_cvr.cr)); m_layout.bit_size = m_cvr.cr.getSize() * 8; m_record.record.kind = Member::Struct; diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index 6eeabe0ff5e4d0..f7d9c0d2d33065 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -314,6 +314,22 @@ bool Block::GetRangeAtIndex(uint32_t range_idx, AddressRange &range) { return false; } +AddressRanges Block::GetRanges() { + AddressRanges ranges; + Function *function = CalculateSymbolContextFunction(); + if (!function) + return ranges; + for (size_t i = 0, e = m_ranges.GetSize(); i < e; ++i) { + ranges.emplace_back(); + auto &range = ranges.back(); + const Range &vm_range = m_ranges.GetEntryRef(i); + range.GetBaseAddress() = function->GetAddressRange().GetBaseAddress(); + range.GetBaseAddress().Slide(vm_range.GetRangeBase()); + range.SetByteSize(vm_range.GetByteSize()); + } + return ranges; +} + bool Block::GetStartAddress(Address &addr) { if (m_ranges.IsEmpty()) return false; diff --git a/lldb/test/API/python_api/address_range/Makefile b/lldb/test/API/python_api/address_range/Makefile new file mode 100644 index 00000000000000..99998b20bcb050 --- /dev/null +++ b/lldb/test/API/python_api/address_range/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/python_api/address_range/TestAddressRange.py b/lldb/test/API/python_api/address_range/TestAddressRange.py new file mode 100644 index 00000000000000..8c27558af4752d --- /dev/null +++ b/lldb/test/API/python_api/address_range/TestAddressRange.py @@ -0,0 +1,256 @@ +""" +Test SBAddressRange APIs. +""" + +import lldb +from lldbsuite.test.lldbtest import * + + +class AddressRangeTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def setUp(self): + TestBase.setUp(self) + + self.build() + exe = self.getBuildArtifact("a.out") + + self.dbg.SetAsync(True) + + self.target = self.dbg.CreateTarget(exe) + self.assertTrue(self.target, VALID_TARGET) + self.launch_info = self.target.GetLaunchInfo() + self.launch_info.SetWorkingDirectory(self.get_process_working_directory()) + + self.bp1 = self.target.BreakpointCreateByName("main", "a.out") + self.bp2 = self.target.BreakpointCreateByName("foo", "a.out") + self.bp3 = self.target.BreakpointCreateByName("bar", "a.out") + + self.assertTrue(self.bp1.IsValid()) + self.assertTrue(self.bp2.IsValid()) + self.assertTrue(self.bp3.IsValid()) + + self.addr1 = self.bp1.GetLocationAtIndex(0).GetAddress() + self.addr2 = self.bp2.GetLocationAtIndex(0).GetAddress() + self.addr3 = self.bp3.GetLocationAtIndex(0).GetAddress() + + self.assertTrue(self.addr1.IsValid()) + self.assertTrue(self.addr2.IsValid()) + self.assertTrue(self.addr3.IsValid()) + + def test_address_range_default(self): + """Testing default constructor.""" + empty_range = lldb.SBAddressRange() + self.assertEqual(empty_range.IsValid(), False) + + def test_address_range_construction(self): + """Make sure the construction and getters work.""" + range = lldb.SBAddressRange(self.addr1, 8) + self.assertEqual(range.IsValid(), True) + self.assertEqual(range.GetBaseAddress(), self.addr1) + self.assertEqual(range.GetByteSize(), 8) + + def test_address_range_clear(self): + """Make sure the clear method works.""" + range = lldb.SBAddressRange(self.addr1, 8) + self.assertEqual(range.IsValid(), True) + self.assertEqual(range.GetBaseAddress(), self.addr1) + self.assertEqual(range.GetByteSize(), 8) + + range.Clear() + self.assertEqual(range.IsValid(), False) + + def test_function(self): + """Make sure the range works in SBFunction APIs.""" + + # Setup breakpoints in main + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + ranges = func.GetRanges() + self.assertEqual(ranges.GetSize(), 1) + + range = ranges.GetAddressRangeAtIndex(0) + self.assertEqual( + range.GetByteSize(), + func.GetEndAddress().GetOffset() - func.GetStartAddress().GetOffset(), + ) + self.assertEqual( + range.GetBaseAddress().GetOffset(), + func.GetStartAddress().GetOffset(), + ) + + def test_block(self): + """Make sure the range works in SBBlock APIs.""" + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + block = loc_addr.GetBlock() + + ranges = block.GetRanges() + self.assertEqual(ranges.GetSize(), 1) + + range = ranges.GetAddressRangeAtIndex(0) + self.assertEqual( + range.GetByteSize(), + block.GetRangeEndAddress(0).GetOffset() + - block.GetRangeStartAddress(0).GetOffset(), + ) + self.assertEqual( + range.GetBaseAddress().GetOffset(), + block.GetRangeStartAddress(0).GetOffset(), + ) + + def test_address_range_list(self): + """Make sure the SBAddressRangeList works by adding and getting ranges.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(range_list.GetSize(), 0) + + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + self.assertRaises(IndexError, lambda: range_list[3]) + + range1_copy = range_list.GetAddressRangeAtIndex(0) + self.assertEqual(range1.GetByteSize(), range1_copy.GetByteSize()) + self.assertEqual( + range1.GetBaseAddress().GetOffset(), + range1_copy.GetBaseAddress().GetOffset(), + ) + + range2_copy = range_list.GetAddressRangeAtIndex(1) + self.assertEqual(range2.GetByteSize(), range2_copy.GetByteSize()) + self.assertEqual( + range2.GetBaseAddress().GetOffset(), + range2_copy.GetBaseAddress().GetOffset(), + ) + + range3_copy = range_list.GetAddressRangeAtIndex(2) + self.assertEqual(range3.GetByteSize(), range3_copy.GetByteSize()) + self.assertEqual( + range3.GetBaseAddress().GetOffset(), + range3_copy.GetBaseAddress().GetOffset(), + ) + + range_list.Clear() + self.assertEqual(range_list.GetSize(), 0) + + def test_address_range_list_len(self): + """Make sure the len() operator works.""" + range = lldb.SBAddressRange(self.addr1, 8) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(len(range_list), 0) + + range_list.Append(range) + self.assertEqual(len(range_list), 1) + + def test_address_range_list_iterator(self): + """Make sure the SBAddressRangeList iterator works.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + + range_list = lldb.SBAddressRangeList() + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + + # Test the iterator + for range in range_list: + self.assertTrue(range.IsValid()) + + def test_address_range_print_invalid(self): + """Make sure the SBAddressRange can be printed when invalid.""" + range = lldb.SBAddressRange() + self.assertEqual(str(range), "") + + def test_address_range_print_resolved(self): + """Make sure the SBAddressRange can be printed when resolved.""" + lldb.target = self.target + error = lldb.SBError() + process = self.target.Launch(self.launch_info, error) + self.assertTrue(error.Success(), "Make sure process launched successfully") + self.assertTrue(process, PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + range = func.GetRanges().GetAddressRangeAtIndex(0) + range_str = str(range) + # [0x1000-0x2000] // Resolved with target or addresses without sections + self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + process.Kill() + + def test_address_range_print_no_section_resolved(self): + """Make sure the SBAddressRange can be printed with no secion.""" + lldb.target = self.target + error = lldb.SBError() + process = self.target.Launch(self.launch_info, error) + self.assertTrue(error.Success(), "Make sure process launched successfully") + self.assertTrue(process, PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, PROCESS_STOPPED) + + loc = self.bp1.GetLocationAtIndex(0) + loc_addr = loc.GetAddress() + func = loc_addr.GetFunction() + range = func.GetRanges().GetAddressRangeAtIndex(0) + + addr = lldb.SBAddress() + addr.SetAddress(lldb.SBSection(), range.GetBaseAddress().GetOffset()) + self.assertFalse(addr.GetSection().IsValid()) + range = lldb.SBAddressRange(addr, range.GetByteSize()) + + range_str = str(range) + # [0x1000-0x2000] // Resolved with target or addresses without sections + self.assertRegex(range_str, "^\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + process.Kill() + + def test_address_range_print_not_resolved(self): + """Make sure the SBAddressRange can be printed when not resolved.""" + range = lldb.SBAddressRange(self.addr1, 8) + range_str = str(range) + # a.out[0x1000-0x2000] // Without target + self.assertRegex(range_str, "^a.out\[0x[0-9a-f]+\-0x[0-9a-f]+\)$") + + def test_address_range_list_print(self): + """Make sure the SBAddressRangeList can be printed.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range3 = lldb.SBAddressRange(self.addr3, 32) + self.dbg.SetAsync(True) + + range_list = lldb.SBAddressRangeList() + self.assertEqual(range_list.GetSize(), 0) + + range_list.Append(range1) + range_list.Append(range2) + range_list.Append(range3) + self.assertEqual(range_list.GetSize(), 3) + + range_list_str = str(range_list) + self.assertTrue(range_list_str.startswith("[")) + self.assertGreater(range_list_str.count(","), 1) + self.assertTrue(range_list_str.endswith("]")) + + def test_address_range_list_indexing(self): + """Make sure the SBAddressRangeList can be printed.""" + range1 = lldb.SBAddressRange(self.addr1, 8) + range2 = lldb.SBAddressRange(self.addr2, 16) + range_list = lldb.SBAddressRangeList() + range_list.Append(range1) + range_list.Append(range2) + + self.assertEqual(range_list.GetSize(), 2) + self.assertRaises(IndexError, lambda: range_list[2]) + self.assertRaises(TypeError, lambda: range_list["0"]) + self.assertEqual(range_list[0], range1) + self.assertEqual(range_list[1], range2) + self.assertEqual(range_list[-1], range2) + self.assertEqual(range_list[-2], range1) diff --git a/lldb/test/API/python_api/address_range/main.cpp b/lldb/test/API/python_api/address_range/main.cpp new file mode 100644 index 00000000000000..b6eaec4a23699b --- /dev/null +++ b/lldb/test/API/python_api/address_range/main.cpp @@ -0,0 +1,8 @@ +void foo() {} +void bar() {} + +int main() { + foo(); + bar(); + return 0; +} diff --git a/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test b/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test new file mode 100644 index 00000000000000..d253981b498c81 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/delayed-definition-die-searching.test @@ -0,0 +1,36 @@ +# Test definition DIE searching is delayed until complete type is required. + +# UNSUPPORTED: system-windows + +# RUN: split-file %s %t +# RUN: %clangxx_host %t/main.cpp %t/t1_def.cpp -gdwarf -o %t.out +# RUN: %lldb -b %t.out -s %t/lldb.cmd | FileCheck %s + +# CHECK: (lldb) p v1 +# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't2' +# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1' +# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't2' resolving forward declaration... +# CHECK: (t2) {} +# CHECK: (lldb) p v2 +# CHECK: DWARFASTParserClang::ParseTypeFromDWARF{{.*}}DW_TAG_structure_type (DW_TAG_structure_type) name = 't1' +# CHECK: DW_TAG_structure_type (DW_TAG_structure_type) 't1' resolving forward declaration... + +#--- lldb.cmd +log enable dwarf comp +p v1 +p v2 + +#--- main.cpp +template +struct t2 { +}; +struct t1; +t2 v1; // this CU doesn't have definition DIE for t1, but only declaration DIE for it. +int main() { +} + +#--- t1_def.cpp +struct t1 { // this CU contains definition DIE for t1. + int x; +}; +t1 v2; diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 614dd98b013b35..7b64c477d13c7f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4754,6 +4754,40 @@ reference to the CFI jump table in the ``LowerTypeTests`` pass. These constants may be useful in low-level programs, such as operating system kernels, which need to refer to the actual function body. +.. _ptrauth_constant: + +Pointer Authentication Constants +-------------------------------- + +``ptrauth (ptr CST, i32 KEY[, i64 DISC[, ptr ADDRDISC]?]?)`` + +A '``ptrauth``' constant represents a pointer with a cryptographic +authentication signature embedded into some bits, as described in the +`Pointer Authentication `__ document. + +A '``ptrauth``' constant is simply a constant equivalent to the +``llvm.ptrauth.sign`` intrinsic, potentially fed by a discriminator +``llvm.ptrauth.blend`` if needed. + +Its type is the same as the first argument. An integer constant discriminator +and an address discriminator may be optionally specified. Otherwise, they have +values ``i64 0`` and ``ptr null``. + +If the address discriminator is ``null`` then the expression is equivalent to + +.. code-block:: llvm + + %tmp = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr CST to i64), i32 KEY, i64 DISC) + %val = inttoptr i64 %tmp to ptr + +Otherwise, the expression is equivalent to: + +.. code-block:: llvm + + %tmp1 = call i64 @llvm.ptrauth.blend(i64 ptrtoint (ptr ADDRDISC to i64), i64 DISC) + %tmp2 = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr CST to i64), i32 KEY, i64 %tmp1) + %val = inttoptr i64 %tmp2 to ptr + .. _constantexprs: Constant Expressions diff --git a/llvm/docs/PointerAuth.md b/llvm/docs/PointerAuth.md index a8d2b4d8f5f0bd..cf2cc6305f130f 100644 --- a/llvm/docs/PointerAuth.md +++ b/llvm/docs/PointerAuth.md @@ -16,6 +16,7 @@ For more details, see the clang documentation page for At the IR level, it is represented using: * a [set of intrinsics](#intrinsics) (to sign/authenticate pointers) +* a [signed pointer constant](#constant) (to sign globals) * a [call operand bundle](#operand-bundle) (to authenticate called pointers) The current implementation leverages the @@ -225,6 +226,27 @@ with a pointer address discriminator, in a way that is specified by the target implementation. +### Constant + +[Intrinsics](#intrinsics) can be used to produce signed pointers dynamically, +in code, but not for signed pointers referenced by constants, in, e.g., global +initializers. + +The latter are represented using a +[``ptrauth`` constant](https://llvm.org/docs/LangRef.html#ptrauth-constant), +which describes an authenticated relocation producing a signed pointer. + +```llvm +ptrauth (ptr CST, i32 KEY, i64 DISC, ptr ADDRDISC) +``` + +is equivalent to: + +```llvm + %disc = call i64 @llvm.ptrauth.blend(i64 ptrtoint(ptr ADDRDISC to i64), i64 DISC) + %signedval = call i64 @llvm.ptrauth.sign(ptr CST, i32 KEY, i64 %disc) +``` + ### Operand Bundle Function pointers used as indirect call targets can be signed when materialized, diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 5828cc156cc785..72f3d945424963 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -912,6 +912,13 @@ class ScalarEvolution { return getBackedgeTakenCount(L, SymbolicMaximum); } + /// Similar to getSymbolicMaxBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedSymbolicMaxBackedgeTakenCount( + const Loop *L, SmallVector &Predicates); + /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); @@ -1549,7 +1556,9 @@ class ScalarEvolution { ScalarEvolution *SE) const; /// Get the symbolic max backedge taken count for the loop. - const SCEV *getSymbolicMax(const Loop *L, ScalarEvolution *SE); + const SCEV * + getSymbolicMax(const Loop *L, ScalarEvolution *SE, + SmallVector *Predicates = nullptr); /// Get the symbolic max backedge taken count for the particular loop exit. const SCEV *getSymbolicMax(const BasicBlock *ExitingBlock, @@ -1746,7 +1755,7 @@ class ScalarEvolution { /// Similar to getBackedgeTakenInfo, but will add predicates as required /// with the purpose of returning complete information. - const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); + BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L); /// Compute the number of times the specified loop will iterate. /// If AllowPredicates is set, we will create new SCEV predicates as @@ -1761,11 +1770,6 @@ class ScalarEvolution { ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates = false); - /// Return a symbolic upper bound for the backedge taken count of the loop. - /// This is more general than getConstantMaxBackedgeTakenCount as it returns - /// an arbitrary expression as opposed to only constants. - const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L); - // Helper functions for computeExitLimitFromCond to avoid exponential time // complexity. @@ -2316,6 +2320,9 @@ class PredicatedScalarEvolution { /// Get the (predicated) backedge count for the analyzed loop. const SCEV *getBackedgeTakenCount(); + /// Get the (predicated) symbolic max backedge count for the analyzed loop. + const SCEV *getSymbolicMaxBackedgeTakenCount(); + /// Adds a new predicate. void addPredicate(const SCEVPredicate &Pred); @@ -2384,6 +2391,9 @@ class PredicatedScalarEvolution { /// The backedge taken count. const SCEV *BackedgeCount = nullptr; + + /// The symbolic backedge taken count. + const SCEV *SymbolicMaxBackedgeCount = nullptr; }; template <> struct DenseMapInfo { diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index df61ec6ed30e0b..69821c22dcd619 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -346,6 +346,7 @@ enum Kind { kw_blockaddress, kw_dso_local_equivalent, kw_no_cfi, + kw_ptrauth, kw_freeze, diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 38ef8e37df91d3..acf89885af6fdb 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -58,15 +58,16 @@ enum : unsigned { WASM_TYPE_V128 = 0x7B, WASM_TYPE_NULLFUNCREF = 0x73, WASM_TYPE_NULLEXTERNREF = 0x72, + WASM_TYPE_NULLEXNREF = 0x74, WASM_TYPE_NULLREF = 0x71, WASM_TYPE_FUNCREF = 0x70, WASM_TYPE_EXTERNREF = 0x6F, + WASM_TYPE_EXNREF = 0x69, WASM_TYPE_ANYREF = 0x6E, WASM_TYPE_EQREF = 0x6D, WASM_TYPE_I31REF = 0x6C, WASM_TYPE_STRUCTREF = 0x6B, WASM_TYPE_ARRAYREF = 0x6A, - WASM_TYPE_EXNREF = 0x69, WASM_TYPE_NONNULLABLE = 0x64, WASM_TYPE_NULLABLE = 0x63, WASM_TYPE_FUNC = 0x60, @@ -261,8 +262,9 @@ enum class ValType { V128 = WASM_TYPE_V128, FUNCREF = WASM_TYPE_FUNCREF, EXTERNREF = WASM_TYPE_EXTERNREF, + EXNREF = WASM_TYPE_EXNREF, // Unmodeled value types include ref types with heap types other than - // func or extern, and type-specialized funcrefs + // func, extern or exn, and type-specialized funcrefs OTHERREF = 0xff, }; @@ -410,7 +412,8 @@ struct WasmDataSegment { // 1) Does not model passive or declarative segments (Segment will end up with // an Offset field of i32.const 0) // 2) Does not model init exprs (Segment will get an empty Functions list) -// 2) Does not model types other than basic funcref/externref (see ValType) +// 3) Does not model types other than basic funcref/externref/exnref (see +// ValType) struct WasmElemSegment { uint32_t Flags; uint32_t TableNumber; diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index d3b9e96520f88a..9999aee61528e5 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -413,6 +413,7 @@ enum ConstantsCodes { // asmstr,conststr] CST_CODE_CE_GEP_WITH_INRANGE = 31, // [opty, flags, range, n x operands] CST_CODE_CE_GEP = 32, // [opty, flags, n x operands] + CST_CODE_PTRAUTH = 33, // [ptr, key, disc, addrdisc] }; /// CastOpcodes - These are values used in the bitcode files to encode which diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index c3e378ed8f6edb..e322cc04c1c769 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -280,11 +280,12 @@ def untyped : ValueType<8, 193> { // Produces an untyped value } def funcref : ValueType<0, 194>; // WebAssembly's funcref type def externref : ValueType<0, 195>; // WebAssembly's externref type -def x86amx : ValueType<8192, 196>; // X86 AMX value -def i64x8 : ValueType<512, 197>; // 8 Consecutive GPRs (AArch64) +def exnref : ValueType<0, 196>; // WebAssembly's exnref type +def x86amx : ValueType<8192, 197>; // X86 AMX value +def i64x8 : ValueType<512, 198>; // 8 Consecutive GPRs (AArch64) def aarch64svcount - : ValueType<16, 198>; // AArch64 predicate-as-counter -def spirvbuiltin : ValueType<0, 199>; // SPIR-V's builtin type + : ValueType<16, 199>; // AArch64 predicate-as-counter +def spirvbuiltin : ValueType<0, 200>; // SPIR-V's builtin type def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249> { // Metadata diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 223b3e11091fa6..2a3d0fca3b1d8a 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -16,7 +16,7 @@ /* Indicate that this is LLVM compiled from the amd-gfx branch. */ #define LLVM_HAVE_BRANCH_AMD_GFX -#define LLVM_MAIN_REVISION 499930 +#define LLVM_MAIN_REVISION 500019 /* Define if LLVM_ENABLE_DUMP is enabled */ #cmakedefine LLVM_ENABLE_DUMP diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 3fa27608ead948..3feb4bd11c998f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -371,9 +371,8 @@ ConstructDecompositionT::addClauseSymsToMap(U &&item, // anything and return false, otherwise return true. template bool ConstructDecompositionT::applyToUnique(const ClauseTy *node) { - auto unique = detail::find_unique(leafs, [=](const auto &dirInfo) { - return llvm::omp::isAllowedClauseForDirective(dirInfo.id, node->id, - version); + auto unique = detail::find_unique(leafs, [=](const auto &leaf) { + return llvm::omp::isAllowedClauseForDirective(leaf.id, node->id, version); }); if (unique != leafs.end()) { @@ -438,8 +437,8 @@ bool ConstructDecompositionT::applyToAll(const ClauseTy *node) { } template -template -bool ConstructDecompositionT::applyClause(Clause &&clause, +template +bool ConstructDecompositionT::applyClause(Specific &&specific, const ClauseTy *node) { // The default behavior is to find the unique directive to which the // given clause may be applied. If there are no such directives, or diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index a1e5005a9d1da5..86f6be7985a23f 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -1008,6 +1008,72 @@ struct OperandTraits : public FixedNumOperandTraits { DEFINE_TRANSPARENT_OPERAND_ACCESSORS(NoCFIValue, Value) +/// A signed pointer, in the ptrauth sense. +class ConstantPtrAuth final : public Constant { + friend struct ConstantPtrAuthKeyType; + friend class Constant; + + ConstantPtrAuth(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, + Constant *AddrDisc); + + void *operator new(size_t s) { return User::operator new(s, 4); } + + void destroyConstantImpl(); + Value *handleOperandChangeImpl(Value *From, Value *To); + +public: + /// Return a pointer signed with the specified parameters. + static ConstantPtrAuth *get(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc); + + /// Produce a new ptrauth expression signing the given value using + /// the same schema as is stored in one. + ConstantPtrAuth *getWithSameSchema(Constant *Pointer) const; + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + + /// The pointer that is signed in this ptrauth signed pointer. + Constant *getPointer() const { return cast(Op<0>().get()); } + + /// The Key ID, an i32 constant. + ConstantInt *getKey() const { return cast(Op<1>().get()); } + + /// The integer discriminator, an i64 constant, or 0. + ConstantInt *getDiscriminator() const { + return cast(Op<2>().get()); + } + + /// The address discriminator if any, or the null constant. + /// If present, this must be a value equivalent to the storage location of + /// the only global-initializer user of the ptrauth signed pointer. + Constant *getAddrDiscriminator() const { + return cast(Op<3>().get()); + } + + /// Whether there is any non-null address discriminator. + bool hasAddressDiscriminator() const { + return !getAddrDiscriminator()->isNullValue(); + } + + /// Check whether an authentication operation with key \p Key and (possibly + /// blended) discriminator \p Discriminator is known to be compatible with + /// this ptrauth signed pointer. + bool isKnownCompatibleWith(const Value *Key, const Value *Discriminator, + const DataLayout &DL) const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const Value *V) { + return V->getValueID() == ConstantPtrAuthVal; + } +}; + +template <> +struct OperandTraits + : public FixedNumOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPtrAuth, Constant) + //===----------------------------------------------------------------------===// /// A constant value that is initialized with an expression using /// other constant values. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 3019f68083d422..107442623ab7bd 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -581,6 +581,7 @@ def llvm_vararg_ty : LLVMType; // this means vararg here def llvm_externref_ty : LLVMType; def llvm_funcref_ty : LLVMType; +def llvm_exnref_ty : LLVMType; //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 237f268784bb02..47aab196a6d4f9 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -31,12 +31,17 @@ def int_wasm_ref_null_extern : DefaultAttrsIntrinsic<[llvm_externref_ty], [], [IntrNoMem]>; def int_wasm_ref_null_func : DefaultAttrsIntrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; +def int_wasm_ref_null_exn: + DefaultAttrsIntrinsic<[llvm_exnref_ty], [], [IntrNoMem]>; def int_wasm_ref_is_null_extern : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_externref_ty], [IntrNoMem], "llvm.wasm.ref.is_null.extern">; def int_wasm_ref_is_null_func : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_funcref_ty], [IntrNoMem], "llvm.wasm.ref.is_null.func">; +def int_wasm_ref_is_null_exn : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_exnref_ty], [IntrNoMem], + "llvm.wasm.ref.is_null.exn">; //===----------------------------------------------------------------------===// // Table intrinsics @@ -47,6 +52,9 @@ def int_wasm_table_set_externref : def int_wasm_table_set_funcref : DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty], [IntrWriteMem]>; +def int_wasm_table_set_exnref : + DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_exnref_ty], + [IntrWriteMem]>; def int_wasm_table_get_externref : DefaultAttrsIntrinsic<[llvm_externref_ty], [llvm_table_ty, llvm_i32_ty], @@ -54,6 +62,9 @@ def int_wasm_table_get_externref : def int_wasm_table_get_funcref : DefaultAttrsIntrinsic<[llvm_funcref_ty], [llvm_table_ty, llvm_i32_ty], [IntrReadMem]>; +def int_wasm_table_get_exnref : + DefaultAttrsIntrinsic<[llvm_exnref_ty], [llvm_table_ty, llvm_i32_ty], + [IntrReadMem]>; // Query the current table size, and increase the current table size. def int_wasm_table_size : @@ -68,6 +79,9 @@ def int_wasm_table_grow_externref : def int_wasm_table_grow_funcref : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_table_ty, llvm_funcref_ty, llvm_i32_ty], []>; +def int_wasm_table_grow_exnref : + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_table_ty, llvm_exnref_ty, llvm_i32_ty], []>; def int_wasm_table_fill_externref : DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_externref_ty, @@ -76,6 +90,10 @@ def int_wasm_table_fill_funcref : DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty, llvm_i32_ty], []>; +def int_wasm_table_fill_exnref : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_i32_ty, llvm_exnref_ty, + llvm_i32_ty], []>; //===----------------------------------------------------------------------===// // Trapping float-to-int conversions diff --git a/llvm/include/llvm/IR/Value.def b/llvm/include/llvm/IR/Value.def index 61f7a87666d094..3ece66a529e125 100644 --- a/llvm/include/llvm/IR/Value.def +++ b/llvm/include/llvm/IR/Value.def @@ -81,6 +81,7 @@ HANDLE_CONSTANT(BlockAddress) HANDLE_CONSTANT(ConstantExpr) HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(DSOLocalEquivalent) HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(NoCFIValue) +HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(ConstantPtrAuth) // ConstantAggregate. HANDLE_CONSTANT(ConstantArray) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 66a99f16cdb638..d44a2d1e2fb117 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -28,10 +28,12 @@ enum IndexedVersion : uint64_t { Version1 = 1, // Version 2: Added a call stack table. Version2 = 2, + // Version 3: Under development. + Version3 = 3, }; constexpr uint64_t MinimumSupportedVersion = Version0; -constexpr uint64_t MaximumSupportedVersion = Version2; +constexpr uint64_t MaximumSupportedVersion = Version3; // Verify that the minimum and maximum satisfy the obvious constraint. static_assert(MinimumSupportedVersion <= MaximumSupportedVersion); @@ -426,8 +428,8 @@ struct IndexedMemProfRecord { // Convert IndexedMemProfRecord to MemProfRecord. Callback is used to // translate CallStackId to call stacks with frames inline. MemProfRecord toMemProfRecord( - llvm::function_ref(const CallStackId)> - Callback) const; + llvm::function_ref(const CallStackId)> Callback) + const; // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h index f3a2e0f4380eb0..84d72df6fc4d81 100644 --- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h @@ -63,6 +63,16 @@ struct Factor { Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} }; +struct OverflowTracking { + bool HasNUW; + bool HasNSW; + bool AllKnownNonNegative; + // Note: AllKnownNonNegative can be true in a case where one of the operands + // is negative, but one the operators is not NSW. AllKnownNonNegative should + // not be used independently of HasNSW + OverflowTracking() : HasNUW(true), HasNSW(true), AllKnownNonNegative(true) {} +}; + class XorOpnd; } // end namespace reassociate @@ -103,7 +113,7 @@ class ReassociatePass : public PassInfoMixin { void ReassociateExpression(BinaryOperator *I); void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - bool HasNUW); + reassociate::OverflowTracking Flags); Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeAdd(Instruction *I, diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index bc8b9b8479e4ff..bd4c2a35ebf2cb 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1983,20 +1983,25 @@ getDependenceDistanceStrideAndSize( return MemoryDepChecker::Dependence::IndirectUnsafe; // Check if we can prove that Sink only accesses memory after Src's end or - // vice versa. - const auto &[SrcStart, SrcEnd] = - getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE); - const auto &[SinkStart, SinkEnd] = - getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE); - - if (!isa(SrcStart) && - !isa(SrcEnd) && - !isa(SinkStart) && - !isa(SinkEnd)) { - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) - return MemoryDepChecker::Dependence::NoDep; - if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) - return MemoryDepChecker::Dependence::NoDep; + // vice versa. At the moment this is limited to cases where either source or + // sink are loop invariant to avoid compile-time increases. This is not + // required for correctness. + if (SE.isLoopInvariant(Src, InnermostLoop) || + SE.isLoopInvariant(Sink, InnermostLoop)) { + const auto &[SrcStart, SrcEnd] = + getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE); + const auto &[SinkStart, SinkEnd] = + getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE); + + if (!isa(SrcStart) && + !isa(SrcEnd) && + !isa(SinkStart) && + !isa(SinkEnd)) { + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) + return MemoryDepChecker::Dependence::NoDep; + if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) + return MemoryDepChecker::Dependence::NoDep; + } } // Need accesses with constant strides and the same direction. We don't want diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 8d971e6a78e420..e46d7183a2a359 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8295,6 +8295,11 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, llvm_unreachable("Invalid ExitCountKind!"); } +const SCEV *ScalarEvolution::getPredicatedSymbolicMaxBackedgeTakenCount( + const Loop *L, SmallVector &Preds) { + return getPredicatedBackedgeTakenInfo(L).getSymbolicMax(L, this, &Preds); +} + bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { return getBackedgeTakenInfo(L).isConstantMaxOrZero(this); } @@ -8311,7 +8316,7 @@ static void PushLoopPHIs(const Loop *L, Worklist.push_back(&PN); } -const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { auto &BTI = getBackedgeTakenInfo(L); if (BTI.hasFullInfo()) @@ -8644,11 +8649,37 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const { return getConstantMax(); } -const SCEV * -ScalarEvolution::BackedgeTakenInfo::getSymbolicMax(const Loop *L, - ScalarEvolution *SE) { - if (!SymbolicMax) - SymbolicMax = SE->computeSymbolicMaxBackedgeTakenCount(L); +const SCEV *ScalarEvolution::BackedgeTakenInfo::getSymbolicMax( + const Loop *L, ScalarEvolution *SE, + SmallVector *Predicates) { + if (!SymbolicMax) { + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just + // constants. + SmallVector ExitCounts; + + for (const auto &ENT : ExitNotTaken) { + const SCEV *ExitCount = ENT.SymbolicMaxNotTaken; + if (!isa(ExitCount)) { + assert(SE->DT.dominates(ENT.ExitingBlock, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + if (Predicates) + for (const auto *P : ENT.Predicates) + Predicates->push_back(P); + + assert((Predicates || ENT.hasAlwaysTruePredicate()) && + "Predicate should be always true!"); + } + } + if (ExitCounts.empty()) + SymbolicMax = SE->getCouldNotCompute(); + else + SymbolicMax = + SE->getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); + } return SymbolicMax; } @@ -13589,6 +13620,24 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, P->print(OS, 4); } + Preds.clear(); + auto *PredSymbolicMax = + SE->getPredicatedSymbolicMaxBackedgeTakenCount(L, Preds); + if (SymbolicBTC != PredSymbolicMax) { + OS << "Loop "; + L->getHeader()->printAsOperand(OS, /*PrintType=*/false); + OS << ": "; + if (!isa(PredSymbolicMax)) { + OS << "Predicated symbolic max backedge-taken count is "; + PrintSCEVWithTypeHint(OS, PredSymbolicMax); + } else + OS << "Unpredictable predicated symbolic max backedge-taken count."; + OS << "\n"; + OS << " Predicates:\n"; + for (const auto *P : Preds) + P->print(OS, 4); + } + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); @@ -14802,6 +14851,17 @@ const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { return BackedgeCount; } +const SCEV *PredicatedScalarEvolution::getSymbolicMaxBackedgeTakenCount() { + if (!SymbolicMaxBackedgeCount) { + SmallVector Preds; + SymbolicMaxBackedgeCount = + SE.getPredicatedSymbolicMaxBackedgeTakenCount(&L, Preds); + for (const auto *P : Preds) + addPredicate(*P); + } + return SymbolicMaxBackedgeCount; +} + void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { if (Preds->implies(&Pred)) return; @@ -14964,30 +15024,6 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, return false; } -const SCEV * -ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = - getExitCount(L, ExitingBB, ScalarEvolution::SymbolicMaximum); - if (!isa(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return getCouldNotCompute(); - return getUMinFromMismatchedTypes(ExitCounts, /*Sequential*/ true); -} - /// A rewriter to replace SCEV expressions in Map with the corresponding entry /// in the map. It skips AddRecExpr because we cannot guarantee that the /// replacement is loop invariant in the loop of the AddRec. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 3baa8ede28ffaf..08138a5e2f2d9d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3140,6 +3140,10 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, return true; } + // Constant ptrauth can be null, iff the base pointer can be. + if (auto *CPA = dyn_cast(V)) + return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth); + // A global variable in address space 0 is non null unless extern weak // or an absolute symbol reference. Other address spaces may have null as a // valid address for a global, so we can't assume anything. diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 20a1bd29577124..d3ab306904da12 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -710,6 +710,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(blockaddress); KEYWORD(dso_local_equivalent); KEYWORD(no_cfi); + KEYWORD(ptrauth); // Metadata types. KEYWORD(distinct); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 5d2056d2085672..df0827996396ef 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4046,6 +4046,60 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { ID.NoCFI = true; return false; } + case lltok::kw_ptrauth: { + // ValID ::= 'ptrauth' '(' ptr @foo ',' i32 + // (',' i64 (',' ptr addrdisc)? )? ')' + Lex.Lex(); + + Constant *Ptr, *Key; + Constant *Disc = nullptr, *AddrDisc = nullptr; + + if (parseToken(lltok::lparen, + "expected '(' in constant ptrauth expression") || + parseGlobalTypeAndValue(Ptr) || + parseToken(lltok::comma, + "expected comma in constant ptrauth expression") || + parseGlobalTypeAndValue(Key)) + return true; + // If present, parse the optional disc/addrdisc. + if (EatIfPresent(lltok::comma)) + if (parseGlobalTypeAndValue(Disc) || + (EatIfPresent(lltok::comma) && parseGlobalTypeAndValue(AddrDisc))) + return true; + if (parseToken(lltok::rparen, + "expected ')' in constant ptrauth expression")) + return true; + + if (!Ptr->getType()->isPointerTy()) + return error(ID.Loc, "constant ptrauth base pointer must be a pointer"); + + auto *KeyC = dyn_cast(Key); + if (!KeyC || KeyC->getBitWidth() != 32) + return error(ID.Loc, "constant ptrauth key must be i32 constant"); + + ConstantInt *DiscC = nullptr; + if (Disc) { + DiscC = dyn_cast(Disc); + if (!DiscC || DiscC->getBitWidth() != 64) + return error( + ID.Loc, + "constant ptrauth integer discriminator must be i64 constant"); + } else { + DiscC = ConstantInt::get(Type::getInt64Ty(Context), 0); + } + + if (AddrDisc) { + if (!AddrDisc->getType()->isPointerTy()) + return error( + ID.Loc, "constant ptrauth address discriminator must be a pointer"); + } else { + AddrDisc = ConstantPointerNull::get(PointerType::get(Context, 0)); + } + + ID.ConstantVal = ConstantPtrAuth::get(Ptr, KeyC, DiscC, AddrDisc); + ID.Kind = ValID::t_Constant; + return false; + } case lltok::kw_trunc: case lltok::kw_bitcast: diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index c085c715179ba6..b7ed9cdf631454 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -222,6 +222,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(CST_CODE, CE_UNOP) STRINGIFY_CODE(CST_CODE, DSO_LOCAL_EQUIVALENT) STRINGIFY_CODE(CST_CODE, NO_CFI_VALUE) + STRINGIFY_CODE(CST_CODE, PTRAUTH) case bitc::CST_CODE_BLOCKADDRESS: return "CST_CODE_BLOCKADDRESS"; STRINGIFY_CODE(CST_CODE, DATA) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 32b9a033173e93..aee627bbde0bf5 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -517,7 +517,8 @@ class BitcodeConstant final : public Value, static constexpr uint8_t NoCFIOpcode = 252; static constexpr uint8_t DSOLocalEquivalentOpcode = 251; static constexpr uint8_t BlockAddressOpcode = 250; - static constexpr uint8_t FirstSpecialOpcode = BlockAddressOpcode; + static constexpr uint8_t ConstantPtrAuthOpcode = 249; + static constexpr uint8_t FirstSpecialOpcode = ConstantPtrAuthOpcode; // Separate struct to make passing different number of parameters to // BitcodeConstant::create() more convenient. @@ -1562,6 +1563,18 @@ Expected BitcodeReader::materializeValue(unsigned StartValID, C = ConstantExpr::get(BC->Opcode, ConstOps[0], ConstOps[1], BC->Flags); } else { switch (BC->Opcode) { + case BitcodeConstant::ConstantPtrAuthOpcode: { + auto *Key = dyn_cast(ConstOps[1]); + if (!Key) + return error("ptrauth key operand must be ConstantInt"); + + auto *Disc = dyn_cast(ConstOps[2]); + if (!Disc) + return error("ptrauth disc operand must be ConstantInt"); + + C = ConstantPtrAuth::get(ConstOps[0], Key, Disc, ConstOps[3]); + break; + } case BitcodeConstant::NoCFIOpcode: { auto *GV = dyn_cast(ConstOps[0]); if (!GV) @@ -3644,6 +3657,16 @@ Error BitcodeReader::parseConstants() { Record[1]); break; } + case bitc::CST_CODE_PTRAUTH: { + if (Record.size() < 4) + return error("Invalid ptrauth record"); + // Ptr, Key, Disc, AddrDisc + V = BitcodeConstant::create(Alloc, CurTy, + BitcodeConstant::ConstantPtrAuthOpcode, + {(unsigned)Record[0], (unsigned)Record[1], + (unsigned)Record[2], (unsigned)Record[3]}); + break; + } } assert(V->getType() == getTypeByID(CurTyID) && "Incorrect result type ID"); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 3d653fe4458f4b..046dad5721c4ce 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2848,6 +2848,12 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal, Code = bitc::CST_CODE_NO_CFI_VALUE; Record.push_back(VE.getTypeID(NC->getGlobalValue()->getType())); Record.push_back(VE.getValueID(NC->getGlobalValue())); + } else if (const auto *CPA = dyn_cast(C)) { + Code = bitc::CST_CODE_PTRAUTH; + Record.push_back(VE.getValueID(CPA->getPointer())); + Record.push_back(VE.getValueID(CPA->getKey())); + Record.push_back(VE.getValueID(CPA->getDiscriminator())); + Record.push_back(VE.getValueID(CPA->getAddrDiscriminator())); } else { #ifndef NDEBUG C->dump(); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c04f7208c61f2a..d8b0f52ecf9e32 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3972,7 +3972,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { // target can override this with custom lowering and calling the // implementation functions. LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (LI.isLegalOrCustom({G_UMIN, Ty}) && LI.isLegalOrCustom({G_UMAX, Ty})) + if (LI.isLegalOrCustom({G_UMIN, Ty})) return lowerAddSubSatToMinMax(MI); return lowerAddSubSatToAddoSubo(MI); } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 93d866384b4829..2f4fdf5208d076 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11186,17 +11186,19 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } -// FIXME: This should be checking for no signed zeros on individual operands, as -// well as no nans. static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, - SDValue RHS, + SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI) { - const TargetOptions &Options = DAG.getTarget().Options; EVT VT = LHS.getValueType(); + if (!VT.isFloatingPoint()) + return false; + + const TargetOptions &Options = DAG.getTarget().Options; - return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && + return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) && TLI.isProfitableToCombineMinNumMaxNum(VT) && - DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); + (Flags.hasNoNaNs() || + (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS))); } static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, @@ -11674,7 +11676,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // select (fcmp gt x, y), x, y -> fmaxnum x, y // // This is OK if we don't care what happens if either operand is a NaN. - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI)) if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; @@ -12267,7 +12269,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (N0.hasOneUse() && + isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) { if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index fc96ecdc662808..fb1424f75e097d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2488,6 +2488,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINIMUM: case ISD::FMAXNUM: case ISD::FMINNUM: + case ISD::FMAXNUM_IEEE: + case ISD::FMINNUM_IEEE: case ISD::FMUL: case ISD::FPOW: case ISD::FREM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8fda35f0086329..12f1d005249d60 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -646,18 +646,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } } - // Zero extend to the promoted type and do the count there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - // Subtract off the extra leading bits in the bigger type. SDValue ExtractLeadingBits = DAG.getConstant( NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); - if (!N->isVPOpcode()) + if (!N->isVPOpcode()) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::SUB, dl, NVT, DAG.getNode(N->getOpcode(), dl, NVT, Op), ExtractLeadingBits); + } + SDValue Mask = N->getOperand(1); SDValue EVL = N->getOperand(2); + // Zero extend to the promoted type and do the count there. + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); return DAG.getNode(ISD::VP_SUB, dl, NVT, DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), ExtractLeadingBits, Mask, EVL); @@ -681,11 +684,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { } // Zero extend to the promoted type and do the count or parity there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - if (!N->isVPOpcode()) + if (!N->isVPOpcode()) { + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); - return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, - N->getOperand(1), N->getOperand(2)); + } + + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, Mask, + EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -1335,12 +1343,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SHL) + if (N->getOpcode() != ISD::VP_SHL) { + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -1364,27 +1379,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { - // Sign extend the input. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); - SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - if (N->getNumOperands() == 2) + if (N->getNumOperands() == 2) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // Sign extend the input. + SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL); + SDValue RHS = VPSExtPromotedInteger(N->getOperand(1), Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { - // Zero extend the input. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); - SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - if (N->getNumOperands() == 2) + if (N->getNumOperands() == 2) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); + // Zero extend the input. + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + SDValue RHS = VPZExtPromotedInteger(N->getOperand(1), Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { @@ -1400,27 +1427,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { - // The input value must be properly sign extended. - SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SRA) + if (N->getOpcode() != ISD::VP_SRA) { + // The input value must be properly sign extended. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // The input value must be properly sign extended. + SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { - // The input value must be properly zero extended. - SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) - RHS = ZExtPromotedInteger(RHS); - if (N->getOpcode() != ISD::VP_SRL) + if (N->getOpcode() != ISD::VP_SRL) { + // The input value must be properly zero extended. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + } + + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + // The input value must be properly zero extended. + SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = VPZExtPromotedInteger(RHS, Mask, EVL); return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, - N->getOperand(2), N->getOperand(3)); + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { @@ -1487,7 +1530,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) { SDValue Mask = N->getOperand(3); SDValue EVL = N->getOperand(4); if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) - Amt = ZExtPromotedInteger(Amt); + Amt = VPZExtPromotedInteger(Amt, Mask, EVL); EVT AmtVT = Amt.getValueType(); SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d925089d5689f1..ba3c7582d5a8a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -275,6 +275,27 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { return DAG.getZeroExtendInReg(Op, dl, OldVT); } + /// Get a promoted operand and zero extend it to the final size. + SDValue VPSExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + // FIXME: Add VP_SIGN_EXTEND_INREG. + EVT VT = Op.getValueType(); + unsigned BitsDiff = VT.getScalarSizeInBits() - OldVT.getScalarSizeInBits(); + SDValue ShiftCst = DAG.getShiftAmountConstant(BitsDiff, VT, dl); + SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShiftCst, Mask, EVL); + return DAG.getNode(ISD::VP_SRA, dl, VT, Shl, ShiftCst, Mask, EVL); + } + + /// Get a promoted operand and zero extend it to the final size. + SDValue VPZExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getVPZeroExtendInReg(Op, Mask, EVL, dl, OldVT); + } + // Promote the given operand V (vector or scalar) according to N's specific // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 40e621f0db2209..361416edb554ca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1174,8 +1174,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FADD: case ISD::VP_FADD: case ISD::FSUB: case ISD::VP_FSUB: case ISD::FMUL: case ISD::VP_FMUL: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: @@ -4237,8 +4241,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::VP_SHL: case ISD::SRA: case ISD::VP_SRA: case ISD::SRL: case ISD::VP_SRL: - case ISD::FMINNUM: case ISD::VP_FMINNUM: - case ISD::FMAXNUM: case ISD::VP_FMAXNUM: + case ISD::FMINNUM: + case ISD::FMINNUM_IEEE: + case ISD::VP_FMINNUM: + case ISD::FMAXNUM: + case ISD::FMAXNUM_IEEE: + case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::VP_FMINIMUM: case ISD::FMAXIMUM: diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 3d5c58d282da56..df1c02c3dc67c2 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -181,6 +181,7 @@ std::string EVT::getEVTString() const { case MVT::Metadata: return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; + case MVT::exnref: return "exnref"; case MVT::externref: return "externref"; case MVT::aarch64svcount: return "aarch64svcount"; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index ced5d78f994ab5..8b1a21f962b08f 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1594,6 +1594,27 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } + if (const ConstantPtrAuth *CPA = dyn_cast(CV)) { + Out << "ptrauth ("; + + // ptrauth (ptr CST, i32 KEY[, i64 DISC[, ptr ADDRDISC]?]?) + unsigned NumOpsToWrite = 2; + if (!CPA->getOperand(2)->isNullValue()) + NumOpsToWrite = 3; + if (!CPA->getOperand(3)->isNullValue()) + NumOpsToWrite = 4; + + ListSeparator LS; + for (unsigned i = 0, e = NumOpsToWrite; i != e; ++i) { + Out << LS; + WriterCtx.TypePrinter->print(CPA->getOperand(i)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, CPA->getOperand(i), WriterCtx); + } + Out << ')'; + return; + } + if (const ConstantArray *CA = dyn_cast(CV)) { Type *ETy = CA->getType()->getElementType(); Out << '['; diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index cfb89d557db479..119fcb4fa03461 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -550,6 +550,9 @@ void llvm::deleteConstant(Constant *C) { case Constant::NoCFIValueVal: delete static_cast(C); break; + case Constant::ConstantPtrAuthVal: + delete static_cast(C); + break; case Constant::UndefValueVal: delete static_cast(C); break; @@ -2015,6 +2018,124 @@ Value *NoCFIValue::handleOperandChangeImpl(Value *From, Value *To) { return nullptr; } +//---- ConstantPtrAuth::get() implementations. +// + +ConstantPtrAuth *ConstantPtrAuth::get(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc) { + Constant *ArgVec[] = {Ptr, Key, Disc, AddrDisc}; + ConstantPtrAuthKeyType MapKey(ArgVec); + LLVMContextImpl *pImpl = Ptr->getContext().pImpl; + return pImpl->ConstantPtrAuths.getOrCreate(Ptr->getType(), MapKey); +} + +ConstantPtrAuth *ConstantPtrAuth::getWithSameSchema(Constant *Pointer) const { + return get(Pointer, getKey(), getDiscriminator(), getAddrDiscriminator()); +} + +ConstantPtrAuth::ConstantPtrAuth(Constant *Ptr, ConstantInt *Key, + ConstantInt *Disc, Constant *AddrDisc) + : Constant(Ptr->getType(), Value::ConstantPtrAuthVal, &Op<0>(), 4) { + assert(Ptr->getType()->isPointerTy()); + assert(Key->getBitWidth() == 32); + assert(Disc->getBitWidth() == 64); + assert(AddrDisc->getType()->isPointerTy()); + setOperand(0, Ptr); + setOperand(1, Key); + setOperand(2, Disc); + setOperand(3, AddrDisc); +} + +/// Remove the constant from the constant table. +void ConstantPtrAuth::destroyConstantImpl() { + getType()->getContext().pImpl->ConstantPtrAuths.remove(this); +} + +Value *ConstantPtrAuth::handleOperandChangeImpl(Value *From, Value *ToV) { + assert(isa(ToV) && "Cannot make Constant refer to non-constant!"); + Constant *To = cast(ToV); + + SmallVector Values; + Values.reserve(getNumOperands()); + + unsigned NumUpdated = 0; + + Use *OperandList = getOperandList(); + unsigned OperandNo = 0; + for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O) { + Constant *Val = cast(O->get()); + if (Val == From) { + OperandNo = (O - OperandList); + Val = To; + ++NumUpdated; + } + Values.push_back(Val); + } + + return getContext().pImpl->ConstantPtrAuths.replaceOperandsInPlace( + Values, this, From, To, NumUpdated, OperandNo); +} + +bool ConstantPtrAuth::isKnownCompatibleWith(const Value *Key, + const Value *Discriminator, + const DataLayout &DL) const { + // If the keys are different, there's no chance for this to be compatible. + if (getKey() != Key) + return false; + + // We can have 3 kinds of discriminators: + // - simple, integer-only: `i64 x, ptr null` vs. `i64 x` + // - address-only: `i64 0, ptr p` vs. `ptr p` + // - blended address/integer: `i64 x, ptr p` vs. `@llvm.ptrauth.blend(p, x)` + + // If this constant has a simple discriminator (integer, no address), easy: + // it's compatible iff the provided full discriminator is also a simple + // discriminator, identical to our integer discriminator. + if (!hasAddressDiscriminator()) + return getDiscriminator() == Discriminator; + + // Otherwise, we can isolate address and integer discriminator components. + const Value *AddrDiscriminator = nullptr; + + // This constant may or may not have an integer discriminator (instead of 0). + if (!getDiscriminator()->isNullValue()) { + // If it does, there's an implicit blend. We need to have a matching blend + // intrinsic in the provided full discriminator. + if (!match(Discriminator, + m_Intrinsic( + m_Value(AddrDiscriminator), m_Specific(getDiscriminator())))) + return false; + } else { + // Otherwise, interpret the provided full discriminator as address-only. + AddrDiscriminator = Discriminator; + } + + // Either way, we can now focus on comparing the address discriminators. + + // Discriminators are i64, so the provided addr disc may be a ptrtoint. + if (auto *Cast = dyn_cast(AddrDiscriminator)) + AddrDiscriminator = Cast->getPointerOperand(); + + // Beyond that, we're only interested in compatible pointers. + if (getAddrDiscriminator()->getType() != AddrDiscriminator->getType()) + return false; + + // These are often the same constant GEP, making them trivially equivalent. + if (getAddrDiscriminator() == AddrDiscriminator) + return true; + + // Finally, they may be equivalent base+offset expressions. + APInt Off1(DL.getIndexTypeSizeInBits(getAddrDiscriminator()->getType()), 0); + auto *Base1 = getAddrDiscriminator()->stripAndAccumulateConstantOffsets( + DL, Off1, /*AllowNonInbounds=*/true); + + APInt Off2(DL.getIndexTypeSizeInBits(AddrDiscriminator->getType()), 0); + auto *Base2 = AddrDiscriminator->stripAndAccumulateConstantOffsets( + DL, Off2, /*AllowNonInbounds=*/true); + + return Base1 == Base2 && Off1 == Off2; +} + //---- ConstantExpr::get() implementations. // diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h index 7067d0d121117b..5153880b5cab64 100644 --- a/llvm/lib/IR/ConstantsContext.h +++ b/llvm/lib/IR/ConstantsContext.h @@ -23,6 +23,7 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -286,6 +287,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value) template struct ConstantAggrKeyType; struct InlineAsmKeyType; struct ConstantExprKeyType; +struct ConstantPtrAuthKeyType; template struct ConstantInfo; template <> struct ConstantInfo { @@ -308,6 +310,10 @@ template <> struct ConstantInfo { using ValType = ConstantAggrKeyType; using TypeClass = VectorType; }; +template <> struct ConstantInfo { + using ValType = ConstantPtrAuthKeyType; + using TypeClass = Type; +}; template struct ConstantAggrKeyType { ArrayRef Operands; @@ -536,6 +542,47 @@ struct ConstantExprKeyType { } }; +struct ConstantPtrAuthKeyType { + ArrayRef Operands; + + ConstantPtrAuthKeyType(ArrayRef Operands) : Operands(Operands) {} + + ConstantPtrAuthKeyType(ArrayRef Operands, const ConstantPtrAuth *) + : Operands(Operands) {} + + ConstantPtrAuthKeyType(const ConstantPtrAuth *C, + SmallVectorImpl &Storage) { + assert(Storage.empty() && "Expected empty storage"); + for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) + Storage.push_back(cast(C->getOperand(I))); + Operands = Storage; + } + + bool operator==(const ConstantPtrAuthKeyType &X) const { + return Operands == X.Operands; + } + + bool operator==(const ConstantPtrAuth *C) const { + if (Operands.size() != C->getNumOperands()) + return false; + for (unsigned I = 0, E = Operands.size(); I != E; ++I) + if (Operands[I] != C->getOperand(I)) + return false; + return true; + } + + unsigned getHash() const { + return hash_combine_range(Operands.begin(), Operands.end()); + } + + using TypeClass = typename ConstantInfo::TypeClass; + + ConstantPtrAuth *create(TypeClass *Ty) const { + return new ConstantPtrAuth(Operands[0], cast(Operands[1]), + cast(Operands[2]), Operands[3]); + } +}; + // Free memory for a given constant. Assumes the constant has already been // removed from all relevant maps. void deleteConstant(Constant *C); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index bd06ff82a15a58..13fa1afeaaff24 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -79,7 +79,7 @@ using ProfileCount = Function::ProfileCount; // are not in the public header file... template class llvm::SymbolTableListTraits; -static cl::opt NonGlobalValueMaxNameSize( +static cl::opt NonGlobalValueMaxNameSize( "non-global-value-max-name-size", cl::Hidden, cl::init(1024), cl::desc("Maximum size for the name of non-global values.")); diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 399fe0dad26c73..392e0d16f1761e 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1562,6 +1562,8 @@ class LLVMContextImpl { DenseMap NoCFIValues; + ConstantUniqueMap ConstantPtrAuths; + ConstantUniqueMap ExprConstants; ConstantUniqueMap InlineAsms; diff --git a/llvm/lib/IR/ValueSymbolTable.cpp b/llvm/lib/IR/ValueSymbolTable.cpp index 52f7ddcdc65a2b..a020acf22a96c5 100644 --- a/llvm/lib/IR/ValueSymbolTable.cpp +++ b/llvm/lib/IR/ValueSymbolTable.cpp @@ -43,23 +43,34 @@ ValueSymbolTable::~ValueSymbolTable() { ValueName *ValueSymbolTable::makeUniqueName(Value *V, SmallString<256> &UniqueName) { unsigned BaseSize = UniqueName.size(); + bool AppenDot = false; + if (auto *GV = dyn_cast(V)) { + // A dot is appended to mark it as clone during ABI demangling so that + // for example "_Z1fv" and "_Z1fv.1" both demangle to "f()", the second + // one being a clone. + // On NVPTX we cannot use a dot because PTX only allows [A-Za-z0-9_$] for + // identifiers. This breaks ABI demangling but at least ptxas accepts and + // compiles the program. + const Module *M = GV->getParent(); + if (!(M && Triple(M->getTargetTriple()).isNVPTX())) + AppenDot = true; + } + while (true) { // Trim any suffix off and append the next number. UniqueName.resize(BaseSize); raw_svector_ostream S(UniqueName); - if (auto *GV = dyn_cast(V)) { - // A dot is appended to mark it as clone during ABI demangling so that - // for example "_Z1fv" and "_Z1fv.1" both demangle to "f()", the second - // one being a clone. - // On NVPTX we cannot use a dot because PTX only allows [A-Za-z0-9_$] for - // identifiers. This breaks ABI demangling but at least ptxas accepts and - // compiles the program. - const Module *M = GV->getParent(); - if (!(M && Triple(M->getTargetTriple()).isNVPTX())) - S << "."; - } + if (AppenDot) + S << "."; S << ++LastUnique; + // Retry if MaxNameSize has been exceeded. + if (MaxNameSize > -1 && UniqueName.size() > (size_t)MaxNameSize) { + assert(BaseSize >= UniqueName.size() - (size_t)MaxNameSize && + "Can't generate unique name: MaxNameSize is too small."); + BaseSize -= UniqueName.size() - (size_t)MaxNameSize; + continue; + } // Try insert the vmap entry with this suffix. auto IterBool = vmap.insert(std::make_pair(UniqueName.str(), V)); if (IterBool.second) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 50f8d6ec842017..684e54444621b5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -629,6 +629,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitConstantExprsRecursively(const Constant *EntryC); void visitConstantExpr(const ConstantExpr *CE); + void visitConstantPtrAuth(const ConstantPtrAuth *CPA); void verifyInlineAsmCall(const CallBase &Call); void verifyStatepoint(const CallBase &Call); void verifyFrameRecoverIndices(); @@ -2422,6 +2423,9 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { if (const auto *CE = dyn_cast(C)) visitConstantExpr(CE); + if (const auto *CPA = dyn_cast(C)) + visitConstantPtrAuth(CPA); + if (const auto *GV = dyn_cast(C)) { // Global Values get visited separately, but we do need to make sure // that the global value is in the correct module @@ -2449,6 +2453,23 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) { "Invalid bitcast", CE); } +void Verifier::visitConstantPtrAuth(const ConstantPtrAuth *CPA) { + Check(CPA->getPointer()->getType()->isPointerTy(), + "signed ptrauth constant base pointer must have pointer type"); + + Check(CPA->getType() == CPA->getPointer()->getType(), + "signed ptrauth constant must have same type as its base pointer"); + + Check(CPA->getKey()->getBitWidth() == 32, + "signed ptrauth constant key must be i32 constant integer"); + + Check(CPA->getAddrDiscriminator()->getType()->isPointerTy(), + "signed ptrauth constant address discriminator must be a pointer"); + + Check(CPA->getDiscriminator()->getBitWidth() == 64, + "signed ptrauth constant discriminator must be i64 constant integer"); +} + bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { // There shouldn't be more attribute sets than there are parameters plus the // function and return value. @@ -5090,6 +5111,8 @@ void Verifier::visitInstruction(Instruction &I) { } else if (isa(I.getOperand(i))) { Check(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i), "Cannot take the address of an inline asm!", &I); + } else if (auto *CPA = dyn_cast(I.getOperand(i))) { + visitConstantExprsRecursively(CPA); } else if (ConstantExpr *CE = dyn_cast(I.getOperand(i))) { if (CE->getType()->isPtrOrPtrVectorTy()) { // If we have a ConstantExpr pointer, we need to see if it came from an diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index 6507a0e5950ebe..23381955c60a88 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -177,8 +177,8 @@ static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx) { static wasm::ValType parseValType(WasmObjectFile::ReadContext &Ctx, uint32_t Code) { - // only directly encoded FUNCREF/EXTERNREF are supported - // (not ref null func or ref null extern) + // only directly encoded FUNCREF/EXTERNREF/EXNREF are supported + // (not ref null func, ref null extern, or ref null exn) switch (Code) { case wasm::WASM_TYPE_I32: case wasm::WASM_TYPE_I64: @@ -187,6 +187,7 @@ static wasm::ValType parseValType(WasmObjectFile::ReadContext &Ctx, case wasm::WASM_TYPE_V128: case wasm::WASM_TYPE_FUNCREF: case wasm::WASM_TYPE_EXTERNREF: + case wasm::WASM_TYPE_EXNREF: return wasm::ValType(Code); } if (Code == wasm::WASM_TYPE_NULLABLE || Code == wasm::WASM_TYPE_NONNULLABLE) { @@ -1288,6 +1289,7 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) { auto ElemType = Im.Table.ElemType; if (ElemType != wasm::ValType::FUNCREF && ElemType != wasm::ValType::EXTERNREF && + ElemType != wasm::ValType::EXNREF && ElemType != wasm::ValType::OTHERREF) return make_error("invalid table element type", object_error::parse_failed); @@ -1346,6 +1348,7 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) { auto ElemType = Tables.back().Type.ElemType; if (ElemType != wasm::ValType::FUNCREF && ElemType != wasm::ValType::EXTERNREF && + ElemType != wasm::ValType::EXNREF && ElemType != wasm::ValType::OTHERREF) { return make_error("invalid table element type", object_error::parse_failed); @@ -1680,6 +1683,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) { Segment.ElemKind = parseValType(Ctx, ElemKind); if (Segment.ElemKind != wasm::ValType::FUNCREF && Segment.ElemKind != wasm::ValType::EXTERNREF && + Segment.ElemKind != wasm::ValType::EXNREF && Segment.ElemKind != wasm::ValType::OTHERREF) { return make_error("invalid elem type", object_error::parse_failed); diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp index 544a91d03dce01..7ad338f65706d5 100644 --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -606,6 +606,7 @@ void ScalarEnumerationTraits::enumeration( ECase(V128); ECase(FUNCREF); ECase(EXTERNREF); + ECase(EXNREF); ECase(OTHERREF); #undef ECase } @@ -640,6 +641,7 @@ void ScalarEnumerationTraits::enumeration( #define ECase(X) IO.enumCase(Type, #X, CONCAT(X)); ECase(FUNCREF); ECase(EXTERNREF); + ECase(EXNREF); ECase(OTHERREF); #undef ECase } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 836206a4fd86e2..798236c295194a 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1212,7 +1212,8 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start, const uint64_t FirstWord = support::endian::readNext(Ptr); - if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2) { + if (FirstWord == memprof::Version1 || FirstWord == memprof::Version2 || + FirstWord == memprof::Version3) { // Everything is good. We can proceed to deserialize the rest. Version = static_cast(FirstWord); } else if (FirstWord >= 24) { @@ -1559,6 +1560,7 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const { "MemProfCallStackTable must not be available"); return getMemProfRecordV0(IndexedRecord, *MemProfFrameTable); case memprof::Version2: + case memprof::Version3: assert(MemProfFrameTable && "MemProfFrameTable must be available"); assert(MemProfCallStackTable && "MemProfCallStackTable must be available"); return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable, diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index b67a9700b680ab..b16714ae8b9a2d 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -617,6 +617,56 @@ static Error writeMemProfV2(ProfOStream &OS, return Error::success(); } +// Write out MemProf Version3 as follows: +// uint64_t Version +// uint64_t RecordTableOffset = RecordTableGenerator.Emit +// uint64_t FramePayloadOffset = Offset for the frame payload +// uint64_t FrameTableOffset = FrameTableGenerator.Emit +// uint64_t CallStackPayloadOffset = Offset for the call stack payload +// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit +// uint64_t Num schema entries +// uint64_t Schema entry 0 +// uint64_t Schema entry 1 +// .... +// uint64_t Schema entry N - 1 +// OnDiskChainedHashTable MemProfRecordData +// OnDiskChainedHashTable MemProfFrameData +// OnDiskChainedHashTable MemProfCallStackData +static Error writeMemProfV3(ProfOStream &OS, + memprof::IndexedMemProfData &MemProfData, + bool MemProfFullSchema) { + OS.write(memprof::Version3); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + OS.write(0ULL); // Reserve space for the memprof call stack payload offset. + OS.write(0ULL); // Reserve space for the memprof call stack table offset. + + auto Schema = memprof::getHotColdSchema(); + if (MemProfFullSchema) + Schema = memprof::getFullSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData, + &Schema, memprof::Version3); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.FrameData); + + uint64_t CallStackPayloadOffset = OS.tell(); + uint64_t CallStackTableOffset = + writeMemProfCallStacks(OS, MemProfData.CallStackData); + + uint64_t Header[] = { + RecordTableOffset, FramePayloadOffset, FrameTableOffset, + CallStackPayloadOffset, CallStackTableOffset, + }; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + // Write out the MemProf data in a requested version. static Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, @@ -629,6 +679,8 @@ static Error writeMemProf(ProfOStream &OS, return writeMemProfV1(OS, MemProfData); case memprof::Version2: return writeMemProfV2(OS, MemProfData, MemProfFullSchema); + case memprof::Version3: + return writeMemProfV3(OS, MemProfData, MemProfFullSchema); } return make_error( diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index e5608644519db4..2f0e53736c82e5 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -52,6 +52,7 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, case Version1: return serializedSizeV0(*this, Schema); case Version2: + case Version3: return serializedSizeV2(*this, Schema); } llvm_unreachable("unsupported MemProf version"); @@ -95,6 +96,7 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, case Version1: return serializedSizeV0(*this, Schema); case Version2: + case Version3: return serializedSizeV2(*this, Schema); } llvm_unreachable("unsupported MemProf version"); @@ -149,6 +151,7 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, serializeV0(*this, Schema, OS); return; case Version2: + case Version3: serializeV2(*this, Schema, OS); return; } @@ -239,14 +242,15 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, case Version1: return deserializeV0(Schema, Ptr); case Version2: + case Version3: return deserializeV2(Schema, Ptr); } llvm_unreachable("unsupported MemProf version"); } MemProfRecord IndexedMemProfRecord::toMemProfRecord( - llvm::function_ref(const CallStackId)> - Callback) const { + llvm::function_ref(const CallStackId)> Callback) + const { MemProfRecord Record; Record.AllocSites.reserve(AllocSites.size()); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f0e5a7d393b6c9..e99c6208594e3b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3125,7 +3125,7 @@ lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, Chain = Unorder.getValue(1); Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL, DAG.getVTList(ContainerVT, MVT::Other), - {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL}); + {Chain, Src, Src, Src, Unorder, VL}); Chain = Src.getValue(1); // We do the conversion on the absolute value and fix the sign at the end. @@ -13704,6 +13704,44 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) -> +// (bitcast (sra (v2Xi16 (bitcast X)), 15)) +// Same for other equivalent types with other equivalent constants. +static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Do this for legal vectors unless they are i1 or i8 vectors. + if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::AND || + N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL) + return SDValue(); + + SDValue And = N->getOperand(0); + SDValue Srl = And.getOperand(0); + + APInt V1, V2, V3; + if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) || + !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) || + !ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3)) + return SDValue(); + + unsigned HalfSize = VT.getScalarSizeInBits() / 2; + if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) || + V3 != (HalfSize - 1)) + return SDValue(); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), HalfSize), + VT.getVectorElementCount() * 2); + SDLoc DL(N); + SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0)); + SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast, + DAG.getConstant(HalfSize - 1, DL, HalfVT)); + return DAG.getNode(ISD::BITCAST, DL, VT, Sra); +} static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -13748,6 +13786,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineBinOpOfZExt(N, DAG)) return V; + if (SDValue V = combineVectorMulToSraBitcast(N, DAG)) + return V; + return SDValue(); } @@ -16087,6 +16128,57 @@ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, return true; } +static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) { + // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) + // This would be benefit for the cases where X and Y are both the same value + // type of low precision vectors. Since the truncate would be lowered into + // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate + // restriction, such pattern would be expanded into a series of "vsetvli" + // and "vnsrl" instructions later to reach this point. + auto IsTruncNode = [](SDValue V) { + if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) + return false; + SDValue VL = V.getOperand(2); + auto *C = dyn_cast(VL); + // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand + bool IsVLMAXForVMSET = (C && C->isAllOnes()) || + (isa(VL) && + cast(VL)->getReg() == RISCV::X0); + return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && IsVLMAXForVMSET; + }; + + SDValue Op = N->getOperand(0); + + // We need to first find the inner level of TRUNCATE_VECTOR_VL node + // to distinguish such pattern. + while (IsTruncNode(Op)) { + if (!Op.hasOneUse()) + return SDValue(); + Op = Op.getOperand(0); + } + + if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse()) + return SDValue(); + + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() || + N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse()) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + if (!N00.getValueType().isVector() || + N00.getValueType() != N10.getValueType() || + N->getValueType(0) != N10.getValueType()) + return SDValue(); + + unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; + SDValue SMin = + DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, + DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); + return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); +} SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -16304,56 +16396,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } } return SDValue(); - case RISCVISD::TRUNCATE_VECTOR_VL: { - // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1)) - // This would be benefit for the cases where X and Y are both the same value - // type of low precision vectors. Since the truncate would be lowered into - // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate - // restriction, such pattern would be expanded into a series of "vsetvli" - // and "vnsrl" instructions later to reach this point. - auto IsTruncNode = [](SDValue V) { - if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) - return false; - SDValue VL = V.getOperand(2); - auto *C = dyn_cast(VL); - // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand - bool IsVLMAXForVMSET = (C && C->isAllOnes()) || - (isa(VL) && - cast(VL)->getReg() == RISCV::X0); - return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL && - IsVLMAXForVMSET; - }; - - SDValue Op = N->getOperand(0); - - // We need to first find the inner level of TRUNCATE_VECTOR_VL node - // to distinguish such pattern. - while (IsTruncNode(Op)) { - if (!Op.hasOneUse()) - return SDValue(); - Op = Op.getOperand(0); - } - - if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) { - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && - N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) { - SDValue N00 = N0.getOperand(0); - SDValue N10 = N1.getOperand(0); - if (N00.getValueType().isVector() && - N00.getValueType() == N10.getValueType() && - N->getValueType(0) == N10.getValueType()) { - unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1; - SDValue SMin = DAG.getNode( - ISD::SMIN, SDLoc(N1), N->getValueType(0), N10, - DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0))); - return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin); - } - } - } - break; - } + case RISCVISD::TRUNCATE_VECTOR_VL: + return combineTruncOfSraSext(N, DAG); case ISD::TRUNCATE: return performTRUNCATECombine(N, DAG, Subtarget); case ISD::SELECT: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ce50fe6e2cbb02..a1b078910e29c9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1311,6 +1311,26 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12), /// HI and ADD_LO address nodes. +// Pseudo for a rematerializable LUI+ADDI sequence for loading an address. +// It will be expanded after register allocation. +// FIXME: The scheduling information does not reflect the multiple instructions. +let Size = 8, isReMaterializable = 1 in +def PseudoMovAddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>, + Sched<[WriteIALU]>; + +def riscv_hi_oneuse : unop_oneuse; +def addr_hi_lo : PatFrag<(ops node:$hi, node:$lo), + (riscv_add_lo (riscv_hi_oneuse node:$hi), node:$lo)>; + +def : Pat<(addr_hi_lo tglobaladdr:$hi, tglobaladdr:$lo), + (PseudoMovAddr tglobaladdr:$hi, tglobaladdr:$lo)>; +def : Pat<(addr_hi_lo tblockaddress:$hi, tblockaddress:$lo), + (PseudoMovAddr tblockaddress:$hi, tblockaddress:$lo)>; +def : Pat<(addr_hi_lo tjumptable:$hi, tjumptable:$lo), + (PseudoMovAddr tjumptable:$hi, tjumptable:$lo)>; +def : Pat<(addr_hi_lo tconstpool:$hi, tconstpool:$lo), + (PseudoMovAddr tconstpool:$hi, tconstpool:$lo)>; + def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>; def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>; def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>; diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index 410989177a8b9c..fecc83a821f420 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE, // 3) The offset value in the Global Address or Constant Pool is 0. bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, MachineInstr *&Lo) { - if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC) + if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC && + Hi.getOpcode() != RISCV::PseudoMovAddr) return false; const MachineOperand &HiOp1 = Hi.getOperand(1); @@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi, HiOp1.getOffset() != 0) return false; - Register HiDestReg = Hi.getOperand(0).getReg(); - if (!MRI->hasOneUse(HiDestReg)) - return false; + if (Hi.getOpcode() == RISCV::PseudoMovAddr) { + // Most of the code should handle it correctly without modification by + // setting Lo and Hi both point to PseudoMovAddr + Lo = &Hi; + } else { + Register HiDestReg = Hi.getOperand(0).getReg(); + if (!MRI->hasOneUse(HiDestReg)) + return false; - Lo = &*MRI->use_instr_begin(HiDestReg); - if (Lo->getOpcode() != RISCV::ADDI) - return false; + Lo = &*MRI->use_instr_begin(HiDestReg); + if (Lo->getOpcode() != RISCV::ADDI) + return false; + } const MachineOperand &LoOp2 = Lo->getOperand(2); - if (Hi.getOpcode() == RISCV::LUI) { + if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) { if (LoOp2.getTargetFlags() != RISCVII::MO_LO || !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) || LoOp2.getOffset() != 0) @@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, Hi.getOperand(1).setOffset(NewOffset); MachineOperand &ImmOp = Lo.getOperand(2); + // Expand PseudoMovAddr into LUI + if (Hi.getOpcode() == RISCV::PseudoMovAddr) { + auto *TII = ST->getInstrInfo(); + Hi.setDesc(TII->get(RISCV::LUI)); + Hi.removeOperand(2); + } + if (Hi.getOpcode() != RISCV::AUIPC) ImmOp.setOffset(NewOffset); @@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, } } + // Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from + // being erased + if (&Lo == &Hi) + return true; + MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg()); Lo.eraseFromParent(); return true; diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp index 52f2ce27164d6e..b7b0c47c084c64 100644 --- a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp @@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass { bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; char RISCVPostRAExpandPseudo::ID = 0; @@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB, switch (MBBI->getOpcode()) { case RISCV::PseudoMovImm: return expandMovImm(MBB, MBBI); + case RISCV::PseudoMovAddr: + return expandMovAddr(MBB, MBBI); default: return false; } @@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB, return true; } +bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + + Register DstReg = MBBI->getOperand(0).getReg(); + bool DstIsDead = MBBI->getOperand(0).isDead(); + bool Renamable = MBBI->getOperand(0).isRenamable(); + + BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI)) + .addReg(DstReg, RegState::Define | getRenamableRegState(Renamable)) + .add(MBBI->getOperand(1)); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) | + getRenamableRegState(Renamable)) + .addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable)) + .add(MBBI->getOperand(2)); + MBBI->eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32", diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index e8f58a19d25e3b..71dfe1062956e3 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -54,6 +54,13 @@ cl::opt // setjmp/longjmp handling using wasm EH instrutions cl::opt WebAssembly::WasmEnableSjLj( "wasm-enable-sjlj", cl::desc("WebAssembly setjmp/longjmp handling")); +// Whether we use the new exnref Wasm EH proposal adopted on Oct 2023. +// Should be used with -wasm-enable-eh. +// Currently set to false by default, but will later change to true and then +// later can be removed after the legacy WAsm EH instructions are removed. +cl::opt WebAssembly::WasmEnableExnref( + "wasm-enable-exnref", cl::desc("WebAssembly exception handling (exnref)"), + cl::init(false)); static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/, const Triple &TT, diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 34502170a5c71f..7f1a5f616ed484 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -44,6 +44,7 @@ extern cl::opt WasmEnableEmEH; // asm.js-style EH extern cl::opt WasmEnableEmSjLj; // asm.js-style SjLJ extern cl::opt WasmEnableEH; // EH using Wasm EH instructions extern cl::opt WasmEnableSjLj; // SjLj using Wasm EH instructions +extern cl::opt WasmEnableExnref; // EH using new Wasm EH (exnref) enum OperandType { /// Basic block label in a branch construct. @@ -355,6 +356,8 @@ inline bool isArgument(unsigned Opc) { case WebAssembly::ARGUMENT_funcref_S: case WebAssembly::ARGUMENT_externref: case WebAssembly::ARGUMENT_externref_S: + case WebAssembly::ARGUMENT_exnref: + case WebAssembly::ARGUMENT_exnref_S: return true; default: return false; @@ -377,6 +380,8 @@ inline bool isCopy(unsigned Opc) { case WebAssembly::COPY_FUNCREF_S: case WebAssembly::COPY_EXTERNREF: case WebAssembly::COPY_EXTERNREF_S: + case WebAssembly::COPY_EXNREF: + case WebAssembly::COPY_EXNREF_S: return true; default: return false; @@ -399,6 +404,8 @@ inline bool isTee(unsigned Opc) { case WebAssembly::TEE_FUNCREF_S: case WebAssembly::TEE_EXTERNREF: case WebAssembly::TEE_EXTERNREF_S: + case WebAssembly::TEE_EXNREF: + case WebAssembly::TEE_EXNREF_S: return true; default: return false; @@ -489,6 +496,8 @@ inline bool isLocalGet(unsigned Opc) { case WebAssembly::LOCAL_GET_FUNCREF_S: case WebAssembly::LOCAL_GET_EXTERNREF: case WebAssembly::LOCAL_GET_EXTERNREF_S: + case WebAssembly::LOCAL_GET_EXNREF: + case WebAssembly::LOCAL_GET_EXNREF_S: return true; default: return false; @@ -511,6 +520,8 @@ inline bool isLocalSet(unsigned Opc) { case WebAssembly::LOCAL_SET_FUNCREF_S: case WebAssembly::LOCAL_SET_EXTERNREF: case WebAssembly::LOCAL_SET_EXTERNREF_S: + case WebAssembly::LOCAL_SET_EXNREF: + case WebAssembly::LOCAL_SET_EXNREF_S: return true; default: return false; @@ -533,6 +544,8 @@ inline bool isLocalTee(unsigned Opc) { case WebAssembly::LOCAL_TEE_FUNCREF_S: case WebAssembly::LOCAL_TEE_EXTERNREF: case WebAssembly::LOCAL_TEE_EXTERNREF_S: + case WebAssembly::LOCAL_TEE_EXNREF: + case WebAssembly::LOCAL_TEE_EXNREF_S: return true; default: return false; diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp index 8ea02bd2ad1ff0..d9c8e22bbbaf5b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp @@ -27,6 +27,7 @@ std::optional WebAssembly::parseType(StringRef Type) { wasm::ValType::V128) .Case("funcref", wasm::ValType::FUNCREF) .Case("externref", wasm::ValType::EXTERNREF) + .Case("exnref", wasm::ValType::EXNREF) .Default(std::nullopt); } @@ -40,6 +41,7 @@ WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) { .Case("v128", WebAssembly::BlockType::V128) .Case("funcref", WebAssembly::BlockType::Funcref) .Case("externref", WebAssembly::BlockType::Externref) + .Case("exnref", WebAssembly::BlockType::Exnref) .Case("void", WebAssembly::BlockType::Void) .Default(WebAssembly::BlockType::Invalid); } @@ -62,6 +64,8 @@ const char *WebAssembly::anyTypeToString(unsigned Type) { return "funcref"; case wasm::WASM_TYPE_EXTERNREF: return "externref"; + case wasm::WASM_TYPE_EXNREF: + return "exnref"; case wasm::WASM_TYPE_FUNC: return "func"; case wasm::WASM_TYPE_NORESULT: @@ -110,6 +114,8 @@ wasm::ValType WebAssembly::regClassToValType(unsigned RC) { return wasm::ValType::FUNCREF; case WebAssembly::EXTERNREFRegClassID: return wasm::ValType::EXTERNREF; + case WebAssembly::EXNREFRegClassID: + return wasm::ValType::EXNREF; default: llvm_unreachable("unexpected type"); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h index 486cf264d13e2f..063ee4dba9068e 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h @@ -32,6 +32,7 @@ enum class BlockType : unsigned { V128 = unsigned(wasm::ValType::V128), Externref = unsigned(wasm::ValType::EXTERNREF), Funcref = unsigned(wasm::ValType::FUNCREF), + Exnref = unsigned(wasm::ValType::EXNREF), // Multivalue blocks (and other non-void blocks) are only emitted when the // blocks will never be exited and are at the ends of functions (see // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made @@ -41,7 +42,8 @@ enum class BlockType : unsigned { }; inline bool isRefType(wasm::ValType Type) { - return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF; + return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF || + Type == wasm::ValType::EXNREF; } // Convert ValType or a list/signature of ValTypes to a string. diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp index 867953b4e8d71d..f9293460e701a0 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp @@ -33,6 +33,7 @@ MVT WebAssembly::parseMVT(StringRef Type) { .Case("v2i64", MVT::v2i64) .Case("funcref", MVT::funcref) .Case("externref", MVT::externref) + .Case("exnref", MVT::exnref) .Default(MVT::INVALID_SIMPLE_VALUE_TYPE); } @@ -58,6 +59,8 @@ wasm::ValType WebAssembly::toValType(MVT Type) { return wasm::ValType::FUNCREF; case MVT::externref: return wasm::ValType::EXTERNREF; + case MVT::exnref: + return wasm::ValType::EXNREF; default: llvm_unreachable("unexpected type"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 443558537da245..0b7ec6e74cab20 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -125,6 +125,8 @@ static char getInvokeSig(wasm::ValType VT) { return 'F'; case wasm::ValType::EXTERNREF: return 'X'; + case wasm::ValType::EXNREF: + return 'E'; default: llvm_unreachable("Unhandled wasm::ValType enum"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 0159c44a79b76d..3c6a29311a10e4 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -100,6 +100,8 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) { return WebAssembly::DROP_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::DROP_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::DROP_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -119,6 +121,8 @@ static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_GET_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::LOCAL_GET_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_GET_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -138,6 +142,8 @@ static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_SET_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::LOCAL_SET_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_SET_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -157,6 +163,8 @@ static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_TEE_FUNCREF; if (RC == &WebAssembly::EXTERNREFRegClass) return WebAssembly::LOCAL_TEE_EXTERNREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_TEE_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -176,6 +184,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { return MVT::funcref; if (RC == &WebAssembly::EXTERNREFRegClass) return MVT::externref; + if (RC == &WebAssembly::EXNREFRegClass) + return MVT::exnref; llvm_unreachable("unrecognized register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 26e13948bc9a68..aa3aa1b007a530 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -137,6 +137,10 @@ class WebAssemblyFastISel final : public FastISel { if (Subtarget->hasReferenceTypes()) return VT; break; + case MVT::exnref: + if (Subtarget->hasReferenceTypes() && Subtarget->hasExceptionHandling()) + return VT; + break; case MVT::f16: return MVT::f32; case MVT::v16i8: @@ -717,6 +721,10 @@ bool WebAssemblyFastISel::fastLowerArguments() { Opc = WebAssembly::ARGUMENT_externref; RC = &WebAssembly::EXTERNREFRegClass; break; + case MVT::exnref: + Opc = WebAssembly::ARGUMENT_exnref; + RC = &WebAssembly::EXNREFRegClass; + break; default: return false; } @@ -821,6 +829,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { case MVT::externref: ResultReg = createResultReg(&WebAssembly::EXTERNREFRegClass); break; + case MVT::exnref: + ResultReg = createResultReg(&WebAssembly::EXNREFRegClass); + break; default: return false; } @@ -948,6 +959,10 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) { Opc = WebAssembly::SELECT_EXTERNREF; RC = &WebAssembly::EXTERNREFRegClass; break; + case MVT::exnref: + Opc = WebAssembly::SELECT_EXNREF; + RC = &WebAssembly::EXNREFRegClass; + break; default: return false; } @@ -1355,6 +1370,7 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { case MVT::v2f64: case MVT::funcref: case MVT::externref: + case MVT::exnref: break; default: return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 518b6932a0c879..4beab9d091581b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -76,6 +76,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( if (Subtarget->hasReferenceTypes()) { addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass); addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass); + if (Subtarget->hasExceptionHandling()) { + addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass); + } } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -142,6 +145,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTruncStoreAction(T, MVT::f16, Expand); } + if (Subtarget->hasHalfPrecision()) { + setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); + setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); + } + // Expand unavailable integer operations. for (auto Op : {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index c1a5a45395e87d..3d37eb2fa27bce 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -292,6 +292,7 @@ defm "": ARGUMENT; defm "": ARGUMENT; defm "": ARGUMENT; defm "": ARGUMENT; +defm "": ARGUMENT; // local.get and local.set are not generated by instruction selection; they // are implied by virtual register uses and defs. @@ -375,6 +376,8 @@ defm "" : LOCAL; defm "" : LOCAL, Requires<[HasSIMD128]>; defm "" : LOCAL, Requires<[HasReferenceTypes]>; defm "" : LOCAL, Requires<[HasReferenceTypes]>; +defm "" : LOCAL, + Requires<[HasReferenceTypes, HasExceptionHandling]>; let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td index 608963d588635e..2654a09387fd4a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -17,8 +17,9 @@ multiclass REF_I { [(set rc:$dst, (!cast("int_wasm_ref_null_" # ht)))], "ref.null_" # ht # "$dst", "ref.null_" # ht, - !cond(!eq(ht, "func") : 0xd070, - !eq(ht, "extern") : 0xd06f)>, + !cond(!eq(ht, "func") : 0xd070, + !eq(ht, "extern") : 0xd06f, + !eq(ht, "exn") : 0xd069)>, Requires<[HasReferenceTypes]>; defm SELECT_#rc: I<(outs rc:$dst), (ins rc:$lhs, rc:$rhs, I32:$cond), (outs), (ins), @@ -37,8 +38,9 @@ multiclass REF_I { defm "" : REF_I; defm "" : REF_I; +defm "" : REF_I; -foreach rc = [FUNCREF, EXTERNREF] in { +foreach rc = [FUNCREF, EXTERNREF, EXNREF] in { def : Pat<(select (i32 (setne I32:$cond, 0)), rc:$lhs, rc:$rhs), (!cast("SELECT_"#rc) rc:$lhs, rc:$rhs, I32:$cond)>; def : Pat<(select (i32 (seteq I32:$cond, 0)), rc:$lhs, rc:$rhs), diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 558e3d859dcd84..baf15ccdbe9edb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -16,33 +16,34 @@ multiclass ABSTRACT_SIMD_I pattern_r, string asmstr_r, string asmstr_s, bits<32> simdop, - Predicate simd_level> { + list reqs> { defm "" : I, - Requires<[simd_level]>; + Requires; } multiclass SIMD_I pattern_r, string asmstr_r = "", - string asmstr_s = "", bits<32> simdop = -1> { + string asmstr_s = "", bits<32> simdop = -1, + list reqs = []> { defm "" : ABSTRACT_SIMD_I; + asmstr_s, simdop, !listconcat([HasSIMD128], reqs)>; } multiclass RELAXED_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : ABSTRACT_SIMD_I; + asmstr_s, simdop, [HasRelaxedSIMD]>; } multiclass HALF_PRECISION_I pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : ABSTRACT_SIMD_I; + asmstr_s, simdop, [HasHalfPrecision]>; } @@ -152,6 +153,19 @@ def F64x2 : Vec { let prefix = "f64x2"; } +def F16x8 : Vec { + let vt = v8f16; + let int_vt = v8i16; + let lane_vt = f32; + let lane_rc = F32; + let lane_bits = 16; + let lane_idx = LaneIdx8; + let lane_load = int_wasm_loadf16_f32; + let splat = PatFrag<(ops node:$x), (v8f16 (splat_vector (f16 $x)))>; + let prefix = "f16x8"; +} + +// TODO: Include F16x8 here when half precision is better supported. defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; @@ -781,13 +795,19 @@ def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), // Bitwise operations //===----------------------------------------------------------------------===// -multiclass SIMDBinary simdop> { +multiclass SIMDBinary simdop, list reqs = []> { defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set (vec.vt V128:$dst), (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], vec.prefix#"."#name#"\t$dst, $lhs, $rhs", - vec.prefix#"."#name, simdop>; + vec.prefix#"."#name, simdop, reqs>; +} + +multiclass HalfPrecisionBinary simdop> { + defm "" : SIMDBinary; } multiclass SIMDBitwise simdop, @@ -1199,6 +1219,7 @@ def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; multiclass SIMDBinaryFP baseInst> { defm "" : SIMDBinary; defm "" : SIMDBinary; + defm "" : HalfPrecisionBinary; } // Addition: add @@ -1242,7 +1263,7 @@ defm PMAX : SIMDBinaryFP; // Also match the pmin/pmax cases where the operands are int vectors (but the // comparison is still a floating point comparison). This can happen when using // the wasm_simd128.h intrinsics because v128_t is an integer vector. -foreach vec = [F32x4, F64x2] in { +foreach vec = [F32x4, F64x2, F16x8] in { defvar pmin = !cast("PMIN_"#vec); defvar pmax = !cast("PMAX_"#vec); def : Pat<(vec.int_vt (vselect @@ -1266,6 +1287,10 @@ def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))), (PMIN_F64x2 V128:$lhs, V128:$rhs)>; def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))), (PMAX_F64x2 V128:$lhs, V128:$rhs)>; +def : Pat<(v8f16 (int_wasm_pmin (v8f16 V128:$lhs), (v8f16 V128:$rhs))), + (PMIN_F16x8 V128:$lhs, V128:$rhs)>; +def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))), + (PMAX_F16x8 V128:$lhs, V128:$rhs)>; //===----------------------------------------------------------------------===// // Conversions diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td index 069ce5e3bc94a9..02f0ab8577c3d0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td @@ -64,6 +64,8 @@ multiclass TABLE { defm "" : TABLE, Requires<[HasReferenceTypes]>; defm "" : TABLE, Requires<[HasReferenceTypes]>; +defm "" : TABLE, + Requires<[HasReferenceTypes, HasExceptionHandling]>; def : Pat<(WebAssemblyTableSet mcsym:$table, i32:$idx, funcref:$r), (TABLE_SET_FUNCREF mcsym:$table, i32:$idx, funcref:$r)>, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index ef174e1716ef1e..d4edb6bf18d932 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -504,6 +504,8 @@ static unsigned getTeeOpcode(const TargetRegisterClass *RC) { return WebAssembly::TEE_EXTERNREF; if (RC == &WebAssembly::FUNCREFRegClass) return WebAssembly::TEE_FUNCREF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::TEE_EXNREF; llvm_unreachable("Unexpected register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 4e2faa608be077..17889dacc868c2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -45,6 +45,7 @@ def V128_0: WebAssemblyReg<"%v128">; def FUNCREF_0 : WebAssemblyReg<"%funcref.0">; def EXTERNREF_0 : WebAssemblyReg<"%externref.0">; +def EXNREF_0 : WebAssemblyReg<"%exnref.0">; // The value stack "register". This is an opaque entity which serves to order // uses and defs that must remain in LIFO order. @@ -68,3 +69,4 @@ def V128 : WebAssemblyRegClass<[v8f16, v4f32, v2f64, v2i64, v4i32, v16i8, 128, (add V128_0)>; def FUNCREF : WebAssemblyRegClass<[funcref], 0, (add FUNCREF_0)>; def EXTERNREF : WebAssemblyRegClass<[externref], 0, (add EXTERNREF_0)>; +def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index de342e89657367..fd92a35c2638a5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -385,18 +385,36 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { using WebAssembly::WasmEnableEH; using WebAssembly::WasmEnableEmEH; using WebAssembly::WasmEnableEmSjLj; +using WebAssembly::WasmEnableExnref; using WebAssembly::WasmEnableSjLj; static void basicCheckForEHAndSjLj(TargetMachine *TM) { - // Before checking, we make sure TargetOptions.ExceptionModel is the same as + + // You can't enable two modes of EH at the same time + if (WasmEnableEmEH && WasmEnableEH) + report_fatal_error( + "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); + // You can't enable two modes of SjLj at the same time + if (WasmEnableEmSjLj && WasmEnableSjLj) + report_fatal_error( + "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); + // You can't mix Emscripten EH with Wasm SjLj. + if (WasmEnableEmEH && WasmEnableSjLj) + report_fatal_error( + "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); + if (WasmEnableExnref && !WasmEnableEH) + report_fatal_error( + "-wasm-enable-exnref should be used with -wasm-enable-eh"); + + // Here we make sure TargetOptions.ExceptionModel is the same as // MCAsmInfo.ExceptionsType. Normally these have to be the same, because clang // stores the exception model info in LangOptions, which is later transferred // to TargetOptions and MCAsmInfo. But when clang compiles bitcode directly, // clang's LangOptions is not used and thus the exception model info is not // correctly transferred to TargetOptions and MCAsmInfo, so we make sure we - // have the correct exception model in WebAssemblyMCAsmInfo constructor. - // But in this case TargetOptions is still not updated, so we make sure they - // are the same. + // have the correct exception model in WebAssemblyMCAsmInfo constructor. But + // in this case TargetOptions is still not updated, so we make sure they are + // the same. TM->Options.ExceptionModel = TM->getMCAsmInfo()->getExceptionHandlingType(); // Basic Correctness checking related to -exception-model @@ -418,18 +436,6 @@ static void basicCheckForEHAndSjLj(TargetMachine *TM) { "-exception-model=wasm only allowed with at least one of " "-wasm-enable-eh or -wasm-enable-sjlj"); - // You can't enable two modes of EH at the same time - if (WasmEnableEmEH && WasmEnableEH) - report_fatal_error( - "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); - // You can't enable two modes of SjLj at the same time - if (WasmEnableEmSjLj && WasmEnableSjLj) - report_fatal_error( - "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); - // You can't mix Emscripten EH with Wasm SjLj. - if (WasmEnableEmEH && WasmEnableSjLj) - report_fatal_error( - "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); // Currently it is allowed to mix Wasm EH with Emscripten SjLj as an interim // measure, but some code will error out at compile time in this combination. // See WebAssemblyLowerEmscriptenEHSjLj pass for details. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index 60e872549f87d9..5e7279808cce63 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -175,6 +175,8 @@ unsigned WebAssembly::getCopyOpcodeForRegClass(const TargetRegisterClass *RC) { return WebAssembly::COPY_FUNCREF; case WebAssembly::EXTERNREFRegClassID: return WebAssembly::COPY_EXTERNREF; + case WebAssembly::EXNREFRegClassID: + return WebAssembly::COPY_EXNREF; default: llvm_unreachable("Unexpected register class"); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 0920179fb76b73..92ad4c34da6e7e 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1391,10 +1391,11 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) { return InlineCost::getAlways("preinliner"); } - // For old FDO inliner, we inline the call site as long as cost is not - // "Never". The cost-benefit check is done earlier. + // For old FDO inliner, we inline the call site if it is below hot threshold, + // even if the function is hot based on sample profile data. This is to + // prevent huge functions from being inlined. if (!CallsitePrioritizedInline) { - return InlineCost::get(Cost.getCost(), INT_MAX); + return InlineCost::get(Cost.getCost(), SampleHotCallSiteThreshold); } // Otherwise only use the cost from call analyzer, but overwite threshold with diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c7e25c9f3d2c92..3fe5478408d457 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -22,8 +22,6 @@ // // Future loop memory idioms to recognize: // memcmp, strlen, etc. -// Future floating point idioms to recognize in -ffast-math mode: -// fpowi // // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). @@ -1107,7 +1105,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( GV->setAlignment(Align(16)); Value *PatternPtr = GV; NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - + // Set the TBAA info if present. if (AATags.TBAA) NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); @@ -1117,7 +1115,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( if (AATags.NoAlias) NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); - } + } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index d91320863e241d..c903e47a93cafd 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -471,7 +471,7 @@ using RepeatedValue = std::pair; static bool LinearizeExprTree(Instruction *I, SmallVectorImpl &Ops, ReassociatePass::OrderedSet &ToRedo, - bool &HasNUW) { + reassociate::OverflowTracking &Flags) { assert((isa(I) || isa(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -512,6 +512,7 @@ static bool LinearizeExprTree(Instruction *I, using LeafMap = DenseMap; LeafMap Leaves; // Leaf -> Total weight so far. SmallVector LeafOrder; // Ensure deterministic leaf output order. + const DataLayout DL = I->getModule()->getDataLayout(); #ifndef NDEBUG SmallPtrSet Visited; // For checking the iteration scheme. @@ -520,8 +521,10 @@ static bool LinearizeExprTree(Instruction *I, std::pair P = Worklist.pop_back_val(); I = P.first; // We examine the operands of this binary operator. - if (isa(I)) - HasNUW &= I->hasNoUnsignedWrap(); + if (isa(I)) { + Flags.HasNUW &= I->hasNoUnsignedWrap(); + Flags.HasNSW &= I->hasNoSignedWrap(); + } for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands. Value *Op = I->getOperand(OpIdx); @@ -648,6 +651,8 @@ static bool LinearizeExprTree(Instruction *I, // Ensure the leaf is only output once. It->second = 0; Ops.push_back(std::make_pair(V, Weight)); + if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW) + Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL)); } // For nilpotent operations or addition there may be no operands, for example @@ -666,7 +671,7 @@ static bool LinearizeExprTree(Instruction *I, /// linearized and optimized, emit them in-order. void ReassociatePass::RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - bool HasNUW) { + OverflowTracking Flags) { assert(Ops.size() > 1 && "Single values should be used directly!"); // Since our optimizations should never increase the number of operations, the @@ -834,8 +839,12 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I, // Note that it doesn't hold for mul if one of the operands is zero. // TODO: We can preserve NUW flag if we prove that all mul operands // are non-zero. - if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add) - ExpressionChangedStart->setHasNoUnsignedWrap(); + if (ExpressionChangedStart->getOpcode() == Instruction::Add) { + if (Flags.HasNUW) + ExpressionChangedStart->setHasNoUnsignedWrap(); + if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW)) + ExpressionChangedStart->setHasNoSignedWrap(); + } } } @@ -1192,8 +1201,8 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector Tree; - bool HasNUW = true; - MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW); + OverflowTracking Flags; + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags); SmallVector Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -1235,7 +1244,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { if (!FoundFactor) { // Make sure to restore the operands to the expression tree. - RewriteExprTree(BO, Factors, HasNUW); + RewriteExprTree(BO, Factors, Flags); return nullptr; } @@ -1247,7 +1256,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { RedoInsts.insert(BO); V = Factors[0].Op; } else { - RewriteExprTree(BO, Factors, HasNUW); + RewriteExprTree(BO, Factors, Flags); V = BO; } @@ -2373,8 +2382,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; - bool HasNUW = true; - MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW); + OverflowTracking Flags; + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags); SmallVector Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) @@ -2567,7 +2576,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { dbgs() << '\n'); // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. - RewriteExprTree(I, Ops, HasNUW); + RewriteExprTree(I, Ops, Flags); } void diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index 809b15b2004952..81d8b01fe7fb72 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -130,8 +130,16 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) { ; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: ; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> ; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll index 845ff078ee0eb4..416742a94e0d36 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll @@ -45,8 +45,13 @@ exit: define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll new file mode 100644 index 00000000000000..8dc79a54eb97a5 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/predicated-symbolic-max-backedge-taken-count.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='print' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s + +; %i and %i + 1 can overflow. +define void @test1(i64 %x, ptr %a, ptr %b) { +; CHECK-LABEL: 'test1' +; CHECK-NEXT: Determining loop execution counts for: @test1 +; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. +; CHECK-NEXT: exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: symbolic max exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: symbolic max exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {1,+,1}<%header> Added Flags: +; +entry: + br label %header + +header: + %conv11 = phi i64 [ 0, %entry ], [ %conv, %latch ] + %i.010 = phi i32 [ 0, %entry ], [ %add, %latch ] + %add = add i32 %i.010, 1 + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom + %ld = load i32, ptr %arrayidx, align 4 + %uncountable.c = icmp eq i32 %ld, 10 + br i1 %uncountable.c, label %exit, label %latch + +latch: + %add2 = add nsw i32 %ld, 1 + %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %conv11 + store i32 %add2, ptr %arrayidx4, align 4 + %conv = zext i32 %add to i64 + %cmp = icmp ult i64 %conv, %x + br i1 %cmp, label %header, label %exit + +exit: + ret void +} + +; %i can overflow. +; +; We need to check that i doesn't wrap, but we don't need a run-time alias +; check. We also need an extra no-wrap check to get the backedge taken count. +define void @test2(i64 %x, ptr %a) { +; CHECK-LABEL: 'test2' +; CHECK-NEXT: Determining loop execution counts for: @test2 +; CHECK-NEXT: Loop %header: Unpredictable backedge-taken count. +; CHECK-NEXT: exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Unpredictable constant max backedge-taken count. +; CHECK-NEXT: Loop %header: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: symbolic max exit count for header: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: symbolic max exit count for latch: ***COULDNOTCOMPUTE*** +; CHECK-NEXT: Loop %header: Predicated symbolic max backedge-taken count is (-1 + (1 umax %x)) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {1,+,1}<%header> Added Flags: +; +entry: + br label %header + +header: + %conv11 = phi i64 [ 0, %entry ], [ %conv, %latch ] + %i.010 = phi i32 [ 0, %entry ], [ %inc, %latch ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %conv11 + %ld = load i32, ptr %arrayidx, align 4 + %uncountable.c = icmp eq i32 %ld, 10 + br i1 %uncountable.c, label %exit, label %latch + +latch: + %add = add nsw i32 %ld, 1 + store i32 %add, ptr %arrayidx, align 4 + %inc = add i32 %i.010, 1 + %conv = zext i32 %inc to i64 + %cmp = icmp ult i64 %conv, %x + br i1 %cmp, label %header, label %exit + +exit: + ret void +} diff --git a/llvm/test/Assembler/invalid-ptrauth-const1.ll b/llvm/test/Assembler/invalid-ptrauth-const1.ll new file mode 100644 index 00000000000000..fba2e230782382 --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const1.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth base pointer must be a pointer +@auth_var = global ptr ptrauth (i32 42, i32 0) diff --git a/llvm/test/Assembler/invalid-ptrauth-const2.ll b/llvm/test/Assembler/invalid-ptrauth-const2.ll new file mode 100644 index 00000000000000..4499c42601c99e --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const2.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth key must be i32 constant +@auth_var = global ptr ptrauth (ptr @var, i32 ptrtoint (ptr @var to i32)) diff --git a/llvm/test/Assembler/invalid-ptrauth-const3.ll b/llvm/test/Assembler/invalid-ptrauth-const3.ll new file mode 100644 index 00000000000000..3f2688d92a0010 --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const3.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth address discriminator must be a pointer +@auth_var = global ptr ptrauth (ptr @var, i32 2, i64 65535, i8 0) diff --git a/llvm/test/Assembler/invalid-ptrauth-const4.ll b/llvm/test/Assembler/invalid-ptrauth-const4.ll new file mode 100644 index 00000000000000..843a220458a61b --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const4.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth integer discriminator must be i64 constant +@auth_var = global ptr ptrauth (ptr @var, i32 2, ptr null, i64 ptrtoint (ptr @var to i64)) diff --git a/llvm/test/Assembler/invalid-ptrauth-const5.ll b/llvm/test/Assembler/invalid-ptrauth-const5.ll new file mode 100644 index 00000000000000..9b47f6f5f423fc --- /dev/null +++ b/llvm/test/Assembler/invalid-ptrauth-const5.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s 2>&1 | FileCheck %s + +@var = global i32 0 + +; CHECK: error: constant ptrauth integer discriminator must be i64 constant +@auth_var = global ptr ptrauth (ptr @var, i32 2, ptr @var)) diff --git a/llvm/test/Assembler/non-global-value-max-name-size-2.ll b/llvm/test/Assembler/non-global-value-max-name-size-2.ll new file mode 100644 index 00000000000000..5eac003ddb4383 --- /dev/null +++ b/llvm/test/Assembler/non-global-value-max-name-size-2.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -S -passes='always-inline' -non-global-value-max-name-size=5 | opt -non-global-value-max-name-size=5 -passes=verify -disable-output + +; Opt should not generate too long name for labels during inlining. + +define internal i32 @inner(i32 %flag) alwaysinline { +entry: + %icmp = icmp slt i32 %flag, 0 + br i1 %icmp, label %one, label %two + +one: + ret i32 42 + +two: + ret i32 44 +} + +define i32 @outer(i32 %x) { +entry: + %call1 = call i32 @inner(i32 %x) + %call2 = call i32 @inner(i32 %x) + %ret = add i32 %call1, %call2 + ret i32 %ret +} \ No newline at end of file diff --git a/llvm/test/Assembler/ptrauth-const.ll b/llvm/test/Assembler/ptrauth-const.ll new file mode 100644 index 00000000000000..94d35146d5927b --- /dev/null +++ b/llvm/test/Assembler/ptrauth-const.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +@var = global i32 0 + +; CHECK: @basic = global ptr ptrauth (ptr @var, i32 0) +@basic = global ptr ptrauth (ptr @var, i32 0) + +; CHECK: @keyed = global ptr ptrauth (ptr @var, i32 3) +@keyed = global ptr ptrauth (ptr @var, i32 3) + +; CHECK: @intdisc = global ptr ptrauth (ptr @var, i32 0, i64 -1) +@intdisc = global ptr ptrauth (ptr @var, i32 0, i64 -1) + +; CHECK: @addrdisc = global ptr ptrauth (ptr @var, i32 2, i64 1234, ptr @addrdisc) +@addrdisc = global ptr ptrauth (ptr @var, i32 2, i64 1234, ptr @addrdisc) + + +@var1 = addrspace(1) global i32 0 + +; CHECK: @addrspace = global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 0) +@addrspace = global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 0) + +; CHECK: @addrspace_addrdisc = addrspace(2) global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 2, i64 1234, ptr addrspace(2) @addrspace_addrdisc) +@addrspace_addrdisc = addrspace(2) global ptr addrspace(1) ptrauth (ptr addrspace(1) @var1, i32 2, i64 1234, ptr addrspace(2) @addrspace_addrdisc) diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index b374924516d665..2a846e036924c7 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -217,6 +217,10 @@ declare void @g.f1() ; CHECK: @g.sanitize_address_dyninit = global i32 0, sanitize_address_dyninit ; CHECK: @g.sanitize_multiple = global i32 0, sanitize_memtag, sanitize_address_dyninit +; ptrauth constant +@auth_var = global ptr ptrauth (ptr @g1, i32 0, i64 65535, ptr null) +; CHECK: @auth_var = global ptr ptrauth (ptr @g1, i32 0, i64 65535) + ;; Aliases ; Format: @ = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] ; [unnamed_addr] alias @ diff --git a/llvm/test/Bitcode/value-with-long-name-dbg.ll b/llvm/test/Bitcode/value-with-long-name-dbg.ll new file mode 100644 index 00000000000000..0cc3569d8617b3 --- /dev/null +++ b/llvm/test/Bitcode/value-with-long-name-dbg.ll @@ -0,0 +1,11 @@ +; REQUIRES: asserts +; Force the size to be small to check assertion message. +; RUN: not --crash opt -S %s -O2 -o - -non-global-value-max-name-size=0 2>&1 | FileCheck %s +; CHECK: Can't generate unique name: MaxNameSize is too small. + +define i32 @f(i32 %a, i32 %b) { + %c = add i32 %a, %b + %d = add i32 %c, %a + %e = add i32 %d, %b + ret i32 %e +} diff --git a/llvm/test/Bitcode/value-with-long-name.ll b/llvm/test/Bitcode/value-with-long-name.ll index 1ca5d133e09ae3..aa7da5f5b7dba9 100644 --- a/llvm/test/Bitcode/value-with-long-name.ll +++ b/llvm/test/Bitcode/value-with-long-name.ll @@ -1,10 +1,10 @@ ; Check the size of generated variable when no option is set ; RUN: opt -S %s -O2 -o - | FileCheck -check-prefix=CHECK-LONG %s +; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=-1 | FileCheck -check-prefix=CHECK-LONG %s ; CHECK-LONG: %{{[a-z]{4}[a-z]+}} ; Then check we correctly cap the size of newly generated non-global values name ; Force the size to be small so that the check works on release and debug build -; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=0 | FileCheck -check-prefix=CHECK-SHORT %s ; RUN: opt -S %s -O2 -o - -non-global-value-max-name-size=1 | FileCheck -check-prefix=CHECK-SHORT %s ; CHECK-SHORT-NOT: %{{[a-z][a-z]+}} @@ -14,5 +14,3 @@ define i32 @f(i32 %a, i32 %b) { %e = add i32 %d, %b ret i32 %e } - - diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll index e843537c10a33a..ed3222529a3bb9 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,12 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0xff000000ff0000 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %c = and <4 x i8> %b, ret <4 x i8> %c } @@ -27,6 +34,12 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_8xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff00 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %c = and <8 x i8> %b, ret <8 x i8> %c } @@ -40,6 +53,12 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_16xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.2d, #0xff00ff00ff00ff00 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %c = and <16 x i8> %b, ret <16 x i8> %c } @@ -56,6 +75,13 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_32xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v2.2d, #0xff00ff00ff00ff00 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %b = and <32 x i8> %ap, ret <32 x i8> %b @@ -73,6 +99,13 @@ define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_2xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov v0.s[0], wzr +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %c = and <2 x i16> %b, ret <2 x i16> %c } @@ -86,6 +119,12 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0xffff0000ffff0000 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %c = and <4 x i16> %b, ret <4 x i16> %c } @@ -99,6 +138,12 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_8xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.2d, #0xffff0000ffff0000 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %c = and <8 x i16> %b, ret <8 x i16> %c } @@ -115,6 +160,13 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_16xi16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v2.2d, #0xffff0000ffff0000 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %c = and <16 x i16> %b, ret <16 x i16> %c } @@ -128,6 +180,13 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_2xi32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov v0.s[0], wzr +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %c = and <2 x i32> %b, ret <2 x i32> %c } @@ -141,6 +200,12 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.2d, #0xffffffff00000000 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %c = and <4 x i32> %b, ret <4 x i32> %c } @@ -157,6 +222,13 @@ define <8 x i32> @vls_sve_and_8xi32(<8 x i32> %b) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_8xi32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v2.2d, #0xffffffff00000000 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %c = and <8 x i32> %b, ret <8 x i32> %c } @@ -170,6 +242,11 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_2xi64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov v0.d[0], xzr +; NONEON-NOSVE-NEXT: ret %c = and <2 x i64> %b, ret <2 x i64> %c } @@ -185,6 +262,12 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: vls_sve_and_4xi64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov v0.d[0], xzr +; NONEON-NOSVE-NEXT: mov v1.d[0], xzr +; NONEON-NOSVE-NEXT: ret %c = and <4 x i64> %b, ret <4 x i64> %c } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll index aa42d5c2a8c132..cd6c2b489efe4c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,16 @@ define <4 x i8> @ctlz_v4i8(<4 x i8> %op) { ; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: mov w8, #8 // =0x8 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h +; NONEON-NOSVE-NEXT: sub v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -30,6 +41,11 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: clz v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -42,6 +58,11 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: clz v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -55,6 +76,14 @@ define void @ctlz_v32i8(ptr %a) { ; CHECK-NEXT: clz z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: clz v0.16b, v0.16b +; NONEON-NOSVE-NEXT: clz v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -71,6 +100,16 @@ define <2 x i16> @ctlz_v2i16(<2 x i16> %op) { ; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: dup v1.2s, w8 +; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s +; NONEON-NOSVE-NEXT: sub v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -83,6 +122,11 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -95,6 +139,11 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -108,6 +157,14 @@ define void @ctlz_v16i16(ptr %a) { ; CHECK-NEXT: clz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h +; NONEON-NOSVE-NEXT: clz v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -122,6 +179,11 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -134,6 +196,11 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -147,6 +214,14 @@ define void @ctlz_v8i32(ptr %a) { ; CHECK-NEXT: clz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: clz v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -161,6 +236,27 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushr d1, d0, #1 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushr d1, d0, #2 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushr d1, d0, #4 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushr d1, d0, #8 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushr d1, d0, #16 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushr d1, d0, #32 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: mvn v0.8b, v0.8b +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h +; NONEON-NOSVE-NEXT: uaddlp v0.1d, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -173,6 +269,27 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #1 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #2 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #4 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #8 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #16 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #32 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -186,6 +303,46 @@ define void @ctlz_v4i64(ptr %a) { ; CHECK-NEXT: clz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctlz_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #1 +; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #1 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #2 +; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #2 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #4 +; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #4 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #8 +; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #8 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #16 +; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #16 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #32 +; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #32 +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b +; NONEON-NOSVE-NEXT: mvn v1.16b, v1.16b +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h +; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s +; NONEON-NOSVE-NEXT: uaddlp v1.2d, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a @@ -205,6 +362,14 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) { ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -217,6 +382,11 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) { ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -229,6 +399,11 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) { ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -242,6 +417,14 @@ define void @ctpop_v32i8(ptr %a) { ; CHECK-NEXT: cnt z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -257,6 +440,15 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) { ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -269,6 +461,12 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) { ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -281,6 +479,12 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) { ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -294,6 +498,16 @@ define void @ctpop_v16i16(ptr %a) { ; CHECK-NEXT: cnt z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -308,6 +522,13 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) { ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -320,6 +541,13 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) { ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -333,6 +561,18 @@ define void @ctpop_v8i32(ptr %a) { ; CHECK-NEXT: cnt z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -347,6 +587,14 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) { ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h +; NONEON-NOSVE-NEXT: uaddlp v0.1d, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -359,6 +607,14 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) { ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -372,6 +628,20 @@ define void @ctpop_v4i64(ptr %a) { ; CHECK-NEXT: cnt z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ctpop_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h +; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s +; NONEON-NOSVE-NEXT: uaddlp v1.2d, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a @@ -392,6 +662,21 @@ define <4 x i8> @cttz_v4i8(<4 x i8> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #256 // =0x100 +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v2.4h, w8 +; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: sub v1.4h, v0.4h, v2.4h +; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h +; NONEON-NOSVE-NEXT: sub v0.4h, v1.4h, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -405,6 +690,14 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.8b, #1 +; NONEON-NOSVE-NEXT: sub v1.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -418,6 +711,14 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) { ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.16b, #1 +; NONEON-NOSVE-NEXT: sub v1.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -433,6 +734,19 @@ define void @cttz_v32i8(ptr %a) { ; CHECK-NEXT: clz z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #1 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: sub v3.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: sub v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -449,6 +763,21 @@ define <2 x i16> @cttz_v2i16(<2 x i16> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #65536 // =0x10000 +; NONEON-NOSVE-NEXT: dup v1.2s, w8 +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v2.2s, w8 +; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20 +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: sub v1.2s, v0.2s, v2.2s +; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: dup v1.2s, w8 +; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s +; NONEON-NOSVE-NEXT: sub v0.2s, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -462,6 +791,18 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10 +; NONEON-NOSVE-NEXT: sub v1.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h +; NONEON-NOSVE-NEXT: sub v0.4h, v1.4h, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -475,6 +816,18 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) { ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v1.8h, w8 +; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10 +; NONEON-NOSVE-NEXT: sub v1.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: dup v1.8h, w8 +; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h +; NONEON-NOSVE-NEXT: sub v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -490,6 +843,24 @@ define void @cttz_v16i16(ptr %a) { ; CHECK-NEXT: clz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10 +; NONEON-NOSVE-NEXT: sub v3.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: sub v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: dup v2.8h, w8 +; NONEON-NOSVE-NEXT: clz v1.8h, v1.8h +; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h +; NONEON-NOSVE-NEXT: sub v1.8h, v2.8h, v1.8h +; NONEON-NOSVE-NEXT: sub v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -505,6 +876,18 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v1.2s, w8 +; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20 +; NONEON-NOSVE-NEXT: sub v1.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: dup v1.2s, w8 +; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s +; NONEON-NOSVE-NEXT: sub v0.2s, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -518,6 +901,18 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) { ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v1.4s, w8 +; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20 +; NONEON-NOSVE-NEXT: sub v1.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: dup v1.4s, w8 +; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: sub v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -533,6 +928,24 @@ define void @cttz_v8i32(ptr %a) { ; CHECK-NEXT: clz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20 +; NONEON-NOSVE-NEXT: sub v3.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: sub v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: dup v2.4s, w8 +; NONEON-NOSVE-NEXT: clz v1.4s, v1.4s +; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: sub v1.4s, v2.4s, v1.4s +; NONEON-NOSVE-NEXT: sub v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -548,6 +961,18 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: sub d1, d0, d1 +; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b +; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h +; NONEON-NOSVE-NEXT: uaddlp v0.1d, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -561,6 +986,18 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) { ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: dup v1.2d, x8 +; NONEON-NOSVE-NEXT: sub v1.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -576,6 +1013,26 @@ define void @cttz_v4i64(ptr %a) { ; CHECK-NEXT: clz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: cttz_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: sub v3.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: sub v0.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b +; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b +; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b +; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h +; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h +; NONEON-NOSVE-NEXT: uaddlp v1.2d, v1.4s +; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll index 260ad16581f139..7e93ee99ed7494 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,12 @@ define void @bitcast_v4i8(ptr %a, ptr %b) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i8>, ptr %a %cast = bitcast <4 x i8> %load to <4 x i8> store volatile <4 x i8> %cast, ptr %b @@ -23,6 +30,12 @@ define void @bitcast_v8i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <8 x i8>, ptr %a %cast = bitcast <8 x i8> %load to <8 x i8> store volatile <8 x i8> %cast, ptr %b @@ -35,6 +48,12 @@ define void @bitcast_v16i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <16 x i8>, ptr %a %cast = bitcast <16 x i8> %load to <16 x i8> store volatile <16 x i8> %cast, ptr %b @@ -49,6 +68,14 @@ define void @bitcast_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <32 x i8>, ptr %a %cast = bitcast <32 x i8> %load to <32 x i8> store volatile <32 x i8> %cast, ptr %b @@ -72,6 +99,16 @@ define void @bitcast_v2i16(ptr %a, ptr %b) { ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: add x8, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8] +; NONEON-NOSVE-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; NONEON-NOSVE-NEXT: str s0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i16>, ptr %a %cast = bitcast <2 x i16> %load to <2 x half> store volatile <2 x half> %cast, ptr %b @@ -84,6 +121,12 @@ define void @bitcast_v4i16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i16>, ptr %a %cast = bitcast <4 x i16> %load to <4 x half> store volatile <4 x half> %cast, ptr %b @@ -96,6 +139,12 @@ define void @bitcast_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <8 x i16>, ptr %a %cast = bitcast <8 x i16> %load to <8 x half> store volatile <8 x half> %cast, ptr %b @@ -110,6 +159,14 @@ define void @bitcast_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <16 x i16>, ptr %a %cast = bitcast <16 x i16> %load to <16 x half> store volatile <16 x half> %cast, ptr %b @@ -122,6 +179,12 @@ define void @bitcast_v2i32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i32>, ptr %a %cast = bitcast <2 x i32> %load to <2 x float> store volatile <2 x float> %cast, ptr %b @@ -134,6 +197,12 @@ define void @bitcast_v4i32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i32>, ptr %a %cast = bitcast <4 x i32> %load to <4 x float> store volatile <4 x float> %cast, ptr %b @@ -148,6 +217,14 @@ define void @bitcast_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <8 x i32>, ptr %a %cast = bitcast <8 x i32> %load to <8 x float> store volatile <8 x float> %cast, ptr %b @@ -160,6 +237,12 @@ define void @bitcast_v1i64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <1 x i64>, ptr %a %cast = bitcast <1 x i64> %load to <1 x double> store volatile <1 x double> %cast, ptr %b @@ -172,6 +255,12 @@ define void @bitcast_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <2 x i64>, ptr %a %cast = bitcast <2 x i64> %load to <2 x double> store volatile <2 x double> %cast, ptr %b @@ -186,6 +275,14 @@ define void @bitcast_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x1, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitcast_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q1, [x1, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %load = load volatile <4 x i64>, ptr %a %cast = bitcast <4 x i64> %load to <4 x double> store volatile <4 x double> %cast, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll index 9a07bd8bd5ac9f..6b8077053b590f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64" @@ -30,6 +31,17 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x2] +; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s +; NONEON-NOSVE-NEXT: neg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: ret %pre_cond = load <8 x i32>, ptr %pre_cond_ptr %left = load <8 x i32>, ptr %left_ptr %right = load <8 x i32>, ptr %right_ptr diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll index aec434b4819d70..318a9cf7d738b2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -10,6 +11,12 @@ define void @build_vector_7_inc1_v4i1(ptr %a) { ; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_7_inc1_v4i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: strb w8, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i1> , ptr %a, align 1 ret void } @@ -23,6 +30,15 @@ define void @build_vector_7_inc1_v32i8(ptr %a) { ; CHECK-NEXT: add z1.b, z1.b, #23 // =0x17 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_7_inc1_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI1_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI1_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <32 x i8> , ptr %a, align 1 ret void } @@ -35,6 +51,15 @@ define void @build_vector_0_inc2_v16i16(ptr %a) { ; CHECK-NEXT: add z0.h, z0.h, #16 // =0x10 ; CHECK-NEXT: str q0, [x0, #16] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI2_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI2_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x i16> , ptr %a, align 2 ret void } @@ -48,6 +73,15 @@ define void @build_vector_0_dec3_v8i32(ptr %a) { ; CHECK-NEXT: add z1.s, z0.s, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI3_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI3_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i32> , ptr %a, align 4 ret void } @@ -64,6 +98,15 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) { ; CHECK-NEXT: add z0.d, z0.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_minus2_dec32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI4_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI4_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i64> , ptr %a, align 8 ret void } @@ -76,6 +119,15 @@ define void @build_vector_no_stride_v4i64(ptr %a) { ; CHECK-NEXT: index z1.d, #0, #4 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_no_stride_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI5_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI5_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI5_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i64> , ptr %a, align 8 ret void } @@ -89,6 +141,15 @@ define void @build_vector_0_inc2_v16f16(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI6_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI6_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI6_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI6_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x half> , ptr %a, align 2 ret void } @@ -103,6 +164,15 @@ define void @build_vector_0_dec3_v8f32(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI7_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI7_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI7_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI7_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x float> , ptr %a, align 4 ret void } @@ -117,6 +187,15 @@ define void @build_vector_minus2_dec32_v4f64(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI8_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_minus2_dec32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI8_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI8_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x double> , ptr %a, align 8 ret void } @@ -131,6 +210,15 @@ define void @build_vector_no_stride_v4f64(ptr %a) { ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI9_1] ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: build_vector_no_stride_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI9_1 +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] +; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI9_1] +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x double> , ptr %a, align 8 ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll index 82e75d6efda352..d2bfc7d4e80969 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -40,6 +41,11 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = shufflevector <4 x i8> %op1, <4 x i8> %op2, <8 x i32> ret <8 x i8> %res } @@ -53,6 +59,13 @@ define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> ret <16 x i8> %res @@ -65,6 +78,13 @@ define void @concat_v32i8(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i8>, ptr %a %op2 = load <16 x i8>, ptr %b %res = shufflevector <16 x i8> %op1, <16 x i8> %op2, <32 x i32> , ptr %a %op2 = load <32 x i8>, ptr %b %res = shufflevector <32 x i8> %op1, <32 x i8> %op2, <64 x i32> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x i16> %op1, <2 x i16> %op2, <4 x i32> ret <4 x i16> %res } @@ -135,6 +168,13 @@ define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> ret <8 x i16> %res } @@ -146,6 +186,13 @@ define void @concat_v16i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b %res = shufflevector <8 x i16> %op1, <8 x i16> %op2, <16 x i32> , ptr %a %op2 = load <16 x i16>, ptr %b %res = shufflevector <16 x i16> %op1, <16 x i16> %op2, <32 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) { ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: zip1 v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> ret <2 x i32> %res } @@ -199,6 +259,13 @@ define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> ret <4 x i32> %res } @@ -210,6 +277,13 @@ define void @concat_v8i32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %op2 = load <4 x i32>, ptr %b %res = shufflevector <4 x i32> %op1, <4 x i32> %op2, <8 x i32> @@ -225,6 +299,14 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = shufflevector <8 x i32> %op1, <8 x i32> %op2, <16 x i32> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> ret <2 x i64> %res } @@ -258,6 +347,13 @@ define void @concat_v4i64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %res = shufflevector <2 x i64> %op1, <2 x i64> %op2, <4 x i32> @@ -273,6 +369,14 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = shufflevector <4 x i64> %op1, <4 x i64> %op2, <8 x i32> @@ -300,6 +404,11 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: zip1 v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> ret <4 x half> %res } @@ -313,6 +422,13 @@ define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> ret <8 x half> %res } @@ -324,6 +440,13 @@ define void @concat_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %op2 = load <8 x half>, ptr %b %res = shufflevector <8 x half> %op1, <8 x half> %op2, <16 x i32> , ptr %a %op2 = load <16 x half>, ptr %b %res = shufflevector <16 x half> %op1, <16 x half> %op2, <32 x i32> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) { ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: zip1 v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> ret <2 x float> %res } @@ -377,6 +513,13 @@ define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> ret <4 x float> %res } @@ -388,6 +531,13 @@ define void @concat_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %op2 = load <4 x float>, ptr %b %res = shufflevector <4 x float> %op1, <4 x float> %op2, <8 x i32> @@ -403,6 +553,14 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = shufflevector <8 x float> %op1, <8 x float> %op2, <16 x i32> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> ret <2 x double> %res } @@ -436,6 +601,13 @@ define void @concat_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x double>, ptr %a %op2 = load <2 x double>, ptr %b %res = shufflevector <2 x double> %op1, <2 x double> %op2, <4 x i32> @@ -451,6 +623,14 @@ define void @concat_v8f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: stp q0, q1, [x2, #32] ; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = shufflevector <4 x double> %op1, <4 x double> %op2, <8 x i32> @@ -468,6 +648,12 @@ define void @concat_v32i8_undef(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v32i8_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i8>, ptr %a %res = shufflevector <16 x i8> %op1, <16 x i8> undef, <32 x i32> , ptr %a %res = shufflevector <8 x i16> %op1, <8 x i16> undef, <16 x i32> @@ -496,6 +688,12 @@ define void @concat_v8i32_undef(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = shufflevector <4 x i32> %op1, <4 x i32> undef, <8 x i32> store <8 x i32> %res, ptr %b @@ -508,6 +706,12 @@ define void @concat_v4i64_undef(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i64_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %res = shufflevector <2 x i64> %op1, <2 x i64> undef, <4 x i32> store <4 x i64> %res, ptr %b @@ -524,6 +728,12 @@ define void @concat_v32i8_4op(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v32i8_4op: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i8>, ptr %a %shuffle = shufflevector <8 x i8> %op1, <8 x i8> undef, <16 x i32> @@ -541,6 +751,12 @@ define void @concat_v16i16_4op(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v16i16_4op: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %shuffle = shufflevector <4 x i16> %op1, <4 x i16> undef, <8 x i32> %res = shufflevector <8 x i16> %shuffle, <8 x i16> undef, <16 x i32> , ptr %a %shuffle = shufflevector <2 x i32> %op1, <2 x i32> undef, <4 x i32> %res = shufflevector <4 x i32> %shuffle, <4 x i32> undef, <8 x i32> @@ -568,6 +790,12 @@ define void @concat_v4i64_4op(ptr %a, ptr %b) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: concat_v4i64_4op: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x i64>, ptr %a %shuffle = shufflevector <1 x i64> %op1, <1 x i64> undef, <2 x i32> %res = shufflevector <2 x i64> %shuffle, <2 x i64> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll index 040e5861e98101..728b85d39bb37f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,12 @@ define <8 x i16> @load_zext_v8i8i16(ptr %ap) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v8i8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i8>, ptr %ap %val = zext <8 x i8> %a to <8 x i16> ret <8 x i16> %val @@ -23,6 +30,12 @@ define <4 x i32> @load_zext_v4i16i32(ptr %ap) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v4i16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i16>, ptr %ap %val = zext <4 x i16> %a to <4 x i32> ret <4 x i32> %val @@ -35,6 +48,12 @@ define <2 x i64> @load_zext_v2i32i64(ptr %ap) { ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v2i32i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ret %a = load <2 x i32>, ptr %ap %val = zext <2 x i32> %a to <2 x i64> ret <2 x i64> %val @@ -54,6 +73,19 @@ define <2 x i256> @load_zext_v2i64i256(ptr %ap) { ; CHECK-NEXT: mov x7, xzr ; CHECK-NEXT: fmov x4, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v2i64i256: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: mov x1, xzr +; NONEON-NOSVE-NEXT: mov x2, xzr +; NONEON-NOSVE-NEXT: mov x3, xzr +; NONEON-NOSVE-NEXT: mov x5, xzr +; NONEON-NOSVE-NEXT: mov x6, xzr +; NONEON-NOSVE-NEXT: mov x4, v0.d[1] +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: mov x7, xzr +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = zext <2 x i64> %a to <2 x i256> ret <2 x i256> %val @@ -75,6 +107,24 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap) { ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v16i8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: sshll v1.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v2.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v0.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #16] +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ldr d3, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d4, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v1.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v4.4h, #0 +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i8>, ptr %ap %val = sext <16 x i8> %a to <16 x i32> ret <16 x i32> %val @@ -90,6 +140,17 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v8i16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i16>, ptr %ap %val = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %val @@ -121,6 +182,39 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) { ; CHECK-NEXT: stp x12, x12, [x8, #112] ; CHECK-NEXT: stp x11, x12, [x8, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v4i32i256: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: add x10, x8, #32 +; NONEON-NOSVE-NEXT: add x11, x8, #96 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: mov x9, v0.d[1] +; NONEON-NOSVE-NEXT: st1 { v0.d }[1], [x10] +; NONEON-NOSVE-NEXT: fmov x10, d0 +; NONEON-NOSVE-NEXT: st1 { v1.d }[1], [x11] +; NONEON-NOSVE-NEXT: mov x11, v1.d[1] +; NONEON-NOSVE-NEXT: asr x10, x10, #63 +; NONEON-NOSVE-NEXT: str d0, [x8] +; NONEON-NOSVE-NEXT: asr x9, x9, #63 +; NONEON-NOSVE-NEXT: str d1, [x8, #64] +; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #16] +; NONEON-NOSVE-NEXT: stp x9, x9, [x8, #48] +; NONEON-NOSVE-NEXT: str x9, [x8, #40] +; NONEON-NOSVE-NEXT: fmov x9, d1 +; NONEON-NOSVE-NEXT: str x10, [x8, #8] +; NONEON-NOSVE-NEXT: asr x10, x11, #63 +; NONEON-NOSVE-NEXT: asr x9, x9, #63 +; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #112] +; NONEON-NOSVE-NEXT: str x10, [x8, #104] +; NONEON-NOSVE-NEXT: stp x9, x9, [x8, #80] +; NONEON-NOSVE-NEXT: str x9, [x8, #72] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = sext <4 x i32> %a to <4 x i256> ret <4 x i256> %val @@ -154,6 +248,22 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) { ; CHECK-NEXT: fmov x1, d6 ; CHECK-NEXT: fmov x5, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_sext_v2i64i256: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: dup v1.2d, v0.d[1] +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: asr x1, x0, #63 +; NONEON-NOSVE-NEXT: asr x5, x8, #63 +; NONEON-NOSVE-NEXT: mov x2, x1 +; NONEON-NOSVE-NEXT: mov x3, x1 +; NONEON-NOSVE-NEXT: mov v1.d[1], x5 +; NONEON-NOSVE-NEXT: mov x6, x5 +; NONEON-NOSVE-NEXT: mov x7, x5 +; NONEON-NOSVE-NEXT: fmov x4, d1 +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = sext <2 x i64> %a to <2 x i256> ret <2 x i256> %val @@ -187,6 +297,34 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap) { ; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 ; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_zext_v16i16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ushll v2.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v3.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v4.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ushll v5.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v0.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #32] +; NONEON-NOSVE-NEXT: ushll v2.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d6, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #40] +; NONEON-NOSVE-NEXT: stp q5, q3, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d16, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d17, [sp, #72] +; NONEON-NOSVE-NEXT: ushll v1.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: ushll v6.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: ushll v5.2d, v16.2s, #0 +; NONEON-NOSVE-NEXT: ushll v7.2d, v17.2s, #0 +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %ap %val = zext <16 x i16> %a to <16 x i64> ret <16 x i64> %val diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll index 45a804becbc557..ec6341d6085a0a 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -27,6 +28,11 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: zip2 v0.8b, v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %ret = call <4 x i1> @llvm.vector.extract.v4i1.v8i1(<8 x i1> %op, i64 4) ret <4 x i1> %ret } @@ -54,6 +60,11 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: zip2 v0.8b, v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %ret = call <4 x i8> @llvm.vector.extract.v4i8.v8i8(<8 x i8> %op, i64 4) ret <4 x i8> %ret } @@ -65,6 +76,14 @@ define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %op, i64 8) ret <8 x i8> %ret } @@ -75,6 +94,12 @@ define void @extract_subvector_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %ret = call <16 x i8> @llvm.vector.extract.v16i8.v32i8(<32 x i8> %op, i64 16) store <16 x i8> %ret, ptr %b @@ -91,6 +116,15 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2) ret <2 x i16> %ret } @@ -102,6 +136,14 @@ define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %op, i64 4) ret <4 x i16> %ret } @@ -112,6 +154,12 @@ define void @extract_subvector_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %ret = call <8 x i16> @llvm.vector.extract.v8i16.v16i16(<16 x i16> %op, i64 8) store <8 x i16> %ret, ptr %b @@ -127,6 +175,12 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) { ; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[1] +; NONEON-NOSVE-NEXT: ret %ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1) ret <1 x i32> %ret } @@ -138,6 +192,14 @@ define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %op, i64 2) ret <2 x i32> %ret } @@ -148,6 +210,12 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %ret = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %op, i64 4) store <4 x i32> %ret, ptr %b @@ -163,6 +231,14 @@ define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <1 x i64> @llvm.vector.extract.v1i64.v2i64(<2 x i64> %op, i64 1) ret <1 x i64> %ret } @@ -173,6 +249,12 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %ret = call <2 x i64> @llvm.vector.extract.v2i64.v4i64(<4 x i64> %op, i64 2) store <2 x i64> %ret, ptr %b @@ -190,6 +272,12 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) { ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[1] +; NONEON-NOSVE-NEXT: ret %ret = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %op, i64 2) ret <2 x half> %ret } @@ -201,6 +289,14 @@ define <4 x half> @extract_subvector_v8f16(<8 x half> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <4 x half> @llvm.vector.extract.v4f16.v8f16(<8 x half> %op, i64 4) ret <4 x half> %ret } @@ -211,6 +307,12 @@ define void @extract_subvector_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %ret = call <8 x half> @llvm.vector.extract.v8f16.v16f16(<16 x half> %op, i64 8) store <8 x half> %ret, ptr %b @@ -226,6 +328,12 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) { ; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[1] +; NONEON-NOSVE-NEXT: ret %ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1) ret <1 x float> %ret } @@ -237,6 +345,14 @@ define <2 x float> @extract_subvector_v4f32(<4 x float> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> %op, i64 2) ret <2 x float> %ret } @@ -247,6 +363,12 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %ret = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> %op, i64 4) store <4 x float> %ret, ptr %b @@ -262,6 +384,14 @@ define <1 x double> @extract_subvector_v2f64(<2 x double> %op) { ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %ret = call <1 x double> @llvm.vector.extract.v1f64.v2f64(<2 x double> %op, i64 1) ret <1 x double> %ret } @@ -272,6 +402,12 @@ define void @extract_subvector_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extract_subvector_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %ret = call <2 x double> @llvm.vector.extract.v2f64.v4f64(<4 x double> %op, i64 2) store <2 x double> %ret, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll index 9c3b5e14289dc1..ac60a614d7ce6c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,12 @@ define half @extractelement_v2f16(<2 x half> %op1) { ; CHECK-NEXT: mov z0.h, z0.h[1] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h0, v0.h[1] +; NONEON-NOSVE-NEXT: ret %r = extractelement <2 x half> %op1, i64 1 ret half %r } @@ -26,6 +33,12 @@ define half @extractelement_v4f16(<4 x half> %op1) { ; CHECK-NEXT: mov z0.h, z0.h[3] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: ret %r = extractelement <4 x half> %op1, i64 3 ret half %r } @@ -37,6 +50,11 @@ define half @extractelement_v8f16(<8 x half> %op1) { ; CHECK-NEXT: mov z0.h, z0.h[7] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: ret %r = extractelement <8 x half> %op1, i64 7 ret half %r } @@ -48,6 +66,11 @@ define half @extractelement_v16f16(ptr %a) { ; CHECK-NEXT: mov z0.h, z0.h[7] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0, #30] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %r = extractelement <16 x half> %op1, i64 15 ret half %r @@ -60,6 +83,12 @@ define float @extractelement_v2f32(<2 x float> %op1) { ; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov s0, v0.s[1] +; NONEON-NOSVE-NEXT: ret %r = extractelement <2 x float> %op1, i64 1 ret float %r } @@ -71,6 +100,11 @@ define float @extractelement_v4f32(<4 x float> %op1) { ; CHECK-NEXT: mov z0.s, z0.s[3] ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov s0, v0.s[3] +; NONEON-NOSVE-NEXT: ret %r = extractelement <4 x float> %op1, i64 3 ret float %r } @@ -82,6 +116,11 @@ define float @extractelement_v8f32(ptr %a) { ; CHECK-NEXT: mov z0.s, z0.s[3] ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0, #28] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %r = extractelement <8 x float> %op1, i64 7 ret float %r @@ -91,6 +130,10 @@ define double @extractelement_v1f64(<1 x double> %op1) { ; CHECK-LABEL: extractelement_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %r = extractelement <1 x double> %op1, i64 0 ret double %r } @@ -101,6 +144,11 @@ define double @extractelement_v2f64(<2 x double> %op1) { ; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov d0, v0.d[1] +; NONEON-NOSVE-NEXT: ret %r = extractelement <2 x double> %op1, i64 1 ret double %r } @@ -112,6 +160,11 @@ define double @extractelement_v4f64(ptr %a) { ; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extractelement_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0, #24] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %r = extractelement <4 x double> %op1, i64 3 ret double %r diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index 21ce689f68e23a..c1d84f6a15ed8c 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -28,6 +29,16 @@ define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: ldr d2, [x1] +; NONEON-NOSVE-NEXT: dup v0.4h, w8 +; NONEON-NOSVE-NEXT: bsl v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x half>, ptr %bp %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) @@ -54,6 +65,16 @@ define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: bsl v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <8 x half>, ptr %ap %b = load <8 x half>, ptr %bp %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) @@ -84,6 +105,17 @@ define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d ; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v16f16_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff +; NONEON-NOSVE-NEXT: ldp q1, q4, [x1] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <16 x half>, ptr %ap %b = load <16 x half>, ptr %bp %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) @@ -112,6 +144,16 @@ define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d0, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: ldr d2, [x1] +; NONEON-NOSVE-NEXT: fneg v0.2s, v0.2s +; NONEON-NOSVE-NEXT: bsl v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <2 x float>, ptr %ap %b = load <2 x float>, ptr %bp %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) @@ -138,6 +180,16 @@ define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: bsl v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <4 x float>, ptr %ap %b = load <4 x float>, ptr %bp %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) @@ -168,6 +220,17 @@ define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d ; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v8f32_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldp q1, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <8 x float>, ptr %ap %b = load <8 x float>, ptr %bp %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) @@ -196,6 +259,16 @@ define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d ; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x1] +; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: bsl v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <2 x double>, ptr %ap %b = load <2 x double>, ptr %bp %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) @@ -226,6 +299,17 @@ define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d ; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldp q1, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x double>, ptr %bp %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) @@ -260,6 +344,17 @@ define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d0, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr d2, [x0] +; NONEON-NOSVE-NEXT: fcvtn v1.2s, v1.2d +; NONEON-NOSVE-NEXT: fneg v0.2s, v0.2s +; NONEON-NOSVE-NEXT: bsl v0.8b, v2.8b, v1.8b +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <2 x float>, ptr %ap %b = load <2 x double>, ptr %bp %tmp0 = fptrunc <2 x double> %b to <2 x float> @@ -304,6 +399,18 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: fcvtn v1.2s, v1.2d +; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.4s, v2.2d +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <4 x float>, ptr %ap %b = load <4 x double>, ptr %bp %tmp0 = fptrunc <4 x double> %b to <4 x float> @@ -337,6 +444,17 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: ldr d1, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <2 x double>, ptr %ap %b = load < 2 x float>, ptr %bp %tmp0 = fpext <2 x float> %b to <2 x double> @@ -381,6 +499,23 @@ define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z4.d, z4.d, z1.d, z2.d ; SVE2-NEXT: stp q3, q4, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff +; NONEON-NOSVE-NEXT: str q1, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtl v4.2d, v4.2s +; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x float>, ptr %bp %tmp0 = fpext <4 x float> %b to <4 x double> @@ -416,6 +551,17 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff +; NONEON-NOSVE-NEXT: ldr d2, [x0] +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: bit v0.8b, v2.8b, v1.8b +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x float>, ptr %bp %tmp0 = fptrunc <4 x float> %b to <4 x half> @@ -471,6 +617,25 @@ define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) { ; SVE2-NEXT: str d5, [x0] ; SVE2-NEXT: add sp, sp, #16 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x1] +; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff +; NONEON-NOSVE-NEXT: mov d1, v0.d[1] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: fcvt h1, d1 +; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, d2 +; NONEON-NOSVE-NEXT: mov d2, v2.d[1] +; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, d2 +; NONEON-NOSVE-NEXT: ldr d2, [x0] +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: dup v1.4h, w8 +; NONEON-NOSVE-NEXT: bit v0.8b, v2.8b, v1.8b +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x double>, ptr %bp %tmp0 = fptrunc <4 x double> %b to <4 x half> @@ -514,6 +679,18 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) { ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: dup v1.8h, w8 +; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %a = load <8 x half>, ptr %ap %b = load <8 x float>, ptr %bp %tmp0 = fptrunc <8 x float> %b to <8 x half> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll index b0a82e699939f1..b51b89d08844d0 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,14 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fadd <2 x half> %op1, %op2 ret <2 x half> %res } @@ -30,6 +39,14 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fadd <4 x half> %op1, %op2 ret <4 x half> %res } @@ -43,6 +60,18 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fadd v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: fadd v1.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fadd <8 x half> %op1, %op2 ret <8 x half> %res } @@ -58,6 +87,29 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fadd v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: fadd v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v2.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s +; NONEON-NOSVE-NEXT: stp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fadd <16 x half> %op1, %op2 @@ -74,6 +126,11 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = fadd <2 x float> %op1, %op2 ret <2 x float> %res } @@ -87,6 +144,11 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fadd <4 x float> %op1, %op2 ret <4 x float> %res } @@ -102,6 +164,15 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fadd <8 x float> %op1, %op2 @@ -118,6 +189,11 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = fadd <2 x double> %op1, %op2 ret <2 x double> %res } @@ -133,6 +209,15 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fadd v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fadd <4 x double> %op1, %op2 @@ -153,6 +238,14 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fdiv <2 x half> %op1, %op2 ret <2 x half> %res } @@ -166,6 +259,14 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fdiv <4 x half> %op1, %op2 ret <4 x half> %res } @@ -179,6 +280,18 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fdiv v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: fdiv v1.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fdiv <8 x half> %op1, %op2 ret <8 x half> %res } @@ -194,6 +307,30 @@ define void @fdiv_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q4, q1, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v5.4s, v4.8h +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v4.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fdiv v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: ldr q3, [x0] +; NONEON-NOSVE-NEXT: fcvtl2 v6.4s, v3.8h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: fdiv v3.4s, v3.4s, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fdiv v5.4s, v6.4s, v5.4s +; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fdiv <16 x half> %op1, %op2 @@ -210,6 +347,11 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fdiv v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = fdiv <2 x float> %op1, %op2 ret <2 x float> %res } @@ -223,6 +365,11 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fdiv <4 x float> %op1, %op2 ret <4 x float> %res } @@ -238,6 +385,15 @@ define void @fdiv_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fdiv v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fdiv v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fdiv <8 x float> %op1, %op2 @@ -254,6 +410,11 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fdiv v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = fdiv <2 x double> %op1, %op2 ret <2 x double> %res } @@ -269,6 +430,15 @@ define void @fdiv_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fdiv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fdiv v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fdiv v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fdiv <4 x double> %op1, %op2 @@ -290,6 +460,46 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: fcvt s16, h0 +; NONEON-NOSVE-NEXT: mov h17, v2.h[2] +; NONEON-NOSVE-NEXT: mov h18, v1.h[2] +; NONEON-NOSVE-NEXT: mov h19, v0.h[2] +; NONEON-NOSVE-NEXT: mov h2, v2.h[3] +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fmadd s6, s16, s7, s6 +; NONEON-NOSVE-NEXT: mov h16, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s7, h19 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmadd s3, s5, s4, s3 +; NONEON-NOSVE-NEXT: fcvt s4, h17 +; NONEON-NOSVE-NEXT: fcvt s5, h18 +; NONEON-NOSVE-NEXT: fcvt h0, s6 +; NONEON-NOSVE-NEXT: fmadd s4, s7, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h16 +; NONEON-NOSVE-NEXT: mov v0.h[1], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: fmadd s1, s5, s1, s2 +; NONEON-NOSVE-NEXT: mov v0.h[2], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.fma.v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) ret <2 x half> %res } @@ -304,6 +514,46 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2 +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: fcvt s16, h0 +; NONEON-NOSVE-NEXT: mov h17, v2.h[2] +; NONEON-NOSVE-NEXT: mov h18, v1.h[2] +; NONEON-NOSVE-NEXT: mov h19, v0.h[2] +; NONEON-NOSVE-NEXT: mov h2, v2.h[3] +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fmadd s6, s16, s7, s6 +; NONEON-NOSVE-NEXT: mov h16, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s7, h19 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmadd s3, s5, s4, s3 +; NONEON-NOSVE-NEXT: fcvt s4, h17 +; NONEON-NOSVE-NEXT: fcvt s5, h18 +; NONEON-NOSVE-NEXT: fcvt h0, s6 +; NONEON-NOSVE-NEXT: fmadd s4, s7, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h16 +; NONEON-NOSVE-NEXT: mov v0.h[1], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: fmadd s1, s5, s1, s2 +; NONEON-NOSVE-NEXT: mov v0.h[2], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.fma.v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) ret <4 x half> %res } @@ -318,6 +568,79 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h3, v2.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: fcvt s16, h0 +; NONEON-NOSVE-NEXT: mov h17, v2.h[2] +; NONEON-NOSVE-NEXT: mov h18, v1.h[2] +; NONEON-NOSVE-NEXT: mov h19, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fmadd s6, s16, s7, s6 +; NONEON-NOSVE-NEXT: fcvt s7, h17 +; NONEON-NOSVE-NEXT: fcvt s16, h18 +; NONEON-NOSVE-NEXT: fcvt s17, h19 +; NONEON-NOSVE-NEXT: mov h18, v1.h[3] +; NONEON-NOSVE-NEXT: mov h19, v0.h[3] +; NONEON-NOSVE-NEXT: fmadd s4, s5, s4, s3 +; NONEON-NOSVE-NEXT: mov h5, v2.h[3] +; NONEON-NOSVE-NEXT: fcvt h3, s6 +; NONEON-NOSVE-NEXT: fmadd s6, s17, s16, s7 +; NONEON-NOSVE-NEXT: mov h17, v2.h[4] +; NONEON-NOSVE-NEXT: fcvt s7, h18 +; NONEON-NOSVE-NEXT: fcvt s16, h19 +; NONEON-NOSVE-NEXT: mov h18, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: mov h19, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: mov v3.h[1], v4.h[0] +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: fmadd s5, s16, s7, s5 +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: mov h16, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: mov v3.h[2], v6.h[0] +; NONEON-NOSVE-NEXT: mov h6, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fmadd s17, s19, s18, s17 +; NONEON-NOSVE-NEXT: mov h18, v1.h[6] +; NONEON-NOSVE-NEXT: mov h19, v0.h[6] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fmadd s4, s16, s7, s4 +; NONEON-NOSVE-NEXT: mov v3.h[3], v5.h[0] +; NONEON-NOSVE-NEXT: fcvt s5, h6 +; NONEON-NOSVE-NEXT: fcvt s6, h18 +; NONEON-NOSVE-NEXT: fcvt s7, h19 +; NONEON-NOSVE-NEXT: fcvt h16, s17 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fmadd s5, s7, s6, s5 +; NONEON-NOSVE-NEXT: mov v3.h[4], v16.h[0] +; NONEON-NOSVE-NEXT: fmadd s0, s0, s1, s2 +; NONEON-NOSVE-NEXT: mov v3.h[5], v4.h[0] +; NONEON-NOSVE-NEXT: fcvt h4, s5 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v3.h[6], v4.h[0] +; NONEON-NOSVE-NEXT: mov v3.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: mov v0.16b, v3.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) ret <8 x half> %res } @@ -334,6 +657,150 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q3, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q4, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q5, q2, [x2] +; NONEON-NOSVE-NEXT: mov h25, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s19, h0 +; NONEON-NOSVE-NEXT: mov h24, v0.h[2] +; NONEON-NOSVE-NEXT: mov h17, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s18, h1 +; NONEON-NOSVE-NEXT: mov h22, v1.h[2] +; NONEON-NOSVE-NEXT: mov h16, v2.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: mov h20, v2.h[2] +; NONEON-NOSVE-NEXT: mov h26, v5.h[1] +; NONEON-NOSVE-NEXT: mov h27, v4.h[1] +; NONEON-NOSVE-NEXT: mov h28, v3.h[1] +; NONEON-NOSVE-NEXT: fcvt s25, h25 +; NONEON-NOSVE-NEXT: mov h7, v2.h[3] +; NONEON-NOSVE-NEXT: mov h29, v4.h[2] +; NONEON-NOSVE-NEXT: fcvt s23, h17 +; NONEON-NOSVE-NEXT: mov h17, v0.h[3] +; NONEON-NOSVE-NEXT: mov h30, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s21, h16 +; NONEON-NOSVE-NEXT: fmadd s6, s19, s18, s6 +; NONEON-NOSVE-NEXT: fcvt s18, h20 +; NONEON-NOSVE-NEXT: fcvt s19, h22 +; NONEON-NOSVE-NEXT: fcvt s20, h24 +; NONEON-NOSVE-NEXT: mov h16, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s22, h5 +; NONEON-NOSVE-NEXT: fcvt s24, h4 +; NONEON-NOSVE-NEXT: fcvt s26, h26 +; NONEON-NOSVE-NEXT: fcvt s27, h27 +; NONEON-NOSVE-NEXT: fcvt s28, h28 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fmadd s21, s25, s23, s21 +; NONEON-NOSVE-NEXT: fcvt s23, h3 +; NONEON-NOSVE-NEXT: mov h25, v5.h[2] +; NONEON-NOSVE-NEXT: fmadd s18, s20, s19, s18 +; NONEON-NOSVE-NEXT: mov h19, v3.h[2] +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: mov h31, v0.h[4] +; NONEON-NOSVE-NEXT: fmadd s26, s28, s27, s26 +; NONEON-NOSVE-NEXT: mov h27, v4.h[3] +; NONEON-NOSVE-NEXT: mov h28, v3.h[3] +; NONEON-NOSVE-NEXT: fmadd s22, s23, s24, s22 +; NONEON-NOSVE-NEXT: fcvt h20, s21 +; NONEON-NOSVE-NEXT: mov h21, v2.h[4] +; NONEON-NOSVE-NEXT: fcvt s23, h25 +; NONEON-NOSVE-NEXT: fcvt s24, h29 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fmadd s16, s17, s16, s7 +; NONEON-NOSVE-NEXT: mov h25, v5.h[3] +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt h26, s26 +; NONEON-NOSVE-NEXT: mov h29, v2.h[5] +; NONEON-NOSVE-NEXT: mov v6.h[1], v20.h[0] +; NONEON-NOSVE-NEXT: fcvt s17, h21 +; NONEON-NOSVE-NEXT: fcvt s20, h30 +; NONEON-NOSVE-NEXT: fmadd s19, s19, s24, s23 +; NONEON-NOSVE-NEXT: fcvt s21, h31 +; NONEON-NOSVE-NEXT: fcvt h7, s22 +; NONEON-NOSVE-NEXT: fcvt s22, h25 +; NONEON-NOSVE-NEXT: fcvt s23, h27 +; NONEON-NOSVE-NEXT: fcvt s24, h28 +; NONEON-NOSVE-NEXT: mov h25, v5.h[4] +; NONEON-NOSVE-NEXT: mov h27, v4.h[4] +; NONEON-NOSVE-NEXT: mov h28, v3.h[4] +; NONEON-NOSVE-NEXT: mov h30, v1.h[5] +; NONEON-NOSVE-NEXT: mov h31, v0.h[5] +; NONEON-NOSVE-NEXT: mov v6.h[2], v18.h[0] +; NONEON-NOSVE-NEXT: fmadd s17, s21, s20, s17 +; NONEON-NOSVE-NEXT: mov v7.h[1], v26.h[0] +; NONEON-NOSVE-NEXT: fcvt h18, s19 +; NONEON-NOSVE-NEXT: fmadd s19, s24, s23, s22 +; NONEON-NOSVE-NEXT: mov h26, v5.h[5] +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt s20, h25 +; NONEON-NOSVE-NEXT: fcvt s21, h27 +; NONEON-NOSVE-NEXT: fcvt s22, h28 +; NONEON-NOSVE-NEXT: mov h27, v4.h[5] +; NONEON-NOSVE-NEXT: mov h28, v3.h[5] +; NONEON-NOSVE-NEXT: fcvt s23, h29 +; NONEON-NOSVE-NEXT: fcvt s24, h30 +; NONEON-NOSVE-NEXT: fcvt s25, h31 +; NONEON-NOSVE-NEXT: mov h29, v2.h[6] +; NONEON-NOSVE-NEXT: mov h30, v1.h[6] +; NONEON-NOSVE-NEXT: mov h31, v0.h[6] +; NONEON-NOSVE-NEXT: mov v7.h[2], v18.h[0] +; NONEON-NOSVE-NEXT: fcvt h18, s19 +; NONEON-NOSVE-NEXT: fmadd s19, s22, s21, s20 +; NONEON-NOSVE-NEXT: mov h20, v5.h[6] +; NONEON-NOSVE-NEXT: mov h21, v4.h[6] +; NONEON-NOSVE-NEXT: mov h22, v3.h[6] +; NONEON-NOSVE-NEXT: fcvt s26, h26 +; NONEON-NOSVE-NEXT: fmadd s23, s25, s24, s23 +; NONEON-NOSVE-NEXT: fcvt s27, h27 +; NONEON-NOSVE-NEXT: fcvt s28, h28 +; NONEON-NOSVE-NEXT: mov v6.h[3], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s17 +; NONEON-NOSVE-NEXT: fcvt s17, h29 +; NONEON-NOSVE-NEXT: fcvt s24, h30 +; NONEON-NOSVE-NEXT: fcvt s25, h31 +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: fcvt s21, h21 +; NONEON-NOSVE-NEXT: fcvt s22, h22 +; NONEON-NOSVE-NEXT: mov v7.h[3], v18.h[0] +; NONEON-NOSVE-NEXT: fmadd s26, s28, s27, s26 +; NONEON-NOSVE-NEXT: fcvt h18, s19 +; NONEON-NOSVE-NEXT: mov h5, v5.h[7] +; NONEON-NOSVE-NEXT: mov h4, v4.h[7] +; NONEON-NOSVE-NEXT: mov h3, v3.h[7] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: fmadd s17, s25, s24, s17 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fmadd s19, s22, s21, s20 +; NONEON-NOSVE-NEXT: mov v6.h[4], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s23 +; NONEON-NOSVE-NEXT: mov v7.h[4], v18.h[0] +; NONEON-NOSVE-NEXT: fcvt h18, s26 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v6.h[5], v16.h[0] +; NONEON-NOSVE-NEXT: mov v7.h[5], v18.h[0] +; NONEON-NOSVE-NEXT: fmadd s3, s3, s4, s5 +; NONEON-NOSVE-NEXT: fcvt h4, s19 +; NONEON-NOSVE-NEXT: fcvt h5, s17 +; NONEON-NOSVE-NEXT: fmadd s0, s0, s1, s2 +; NONEON-NOSVE-NEXT: mov v7.h[6], v4.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s3 +; NONEON-NOSVE-NEXT: mov v6.h[6], v5.h[0] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v7.h[7], v1.h[0] +; NONEON-NOSVE-NEXT: mov v6.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: stp q7, q6, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %op3 = load <16 x half>, ptr %c @@ -352,6 +819,12 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmla v2.2s, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) ret <2 x float> %res } @@ -366,6 +839,12 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmla v2.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) ret <4 x float> %res } @@ -382,6 +861,16 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q5, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fmla v1.4s, v0.4s, v2.4s +; NONEON-NOSVE-NEXT: fmla v5.4s, v4.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q1, q5, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %op3 = load <8 x float>, ptr %c @@ -400,6 +889,12 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmla v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) ret <2 x double> %res } @@ -416,6 +911,16 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q5, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fmla v1.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: fmla v5.2d, v4.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q1, q5, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %op3 = load <4 x double>, ptr %c @@ -437,6 +942,14 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fmul <2 x half> %op1, %op2 ret <2 x half> %res } @@ -450,6 +963,14 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fmul <4 x half> %op1, %op2 ret <4 x half> %res } @@ -463,6 +984,18 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fmul v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: fmul v1.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fmul <8 x half> %op1, %op2 ret <8 x half> %res } @@ -478,6 +1011,29 @@ define void @fmul_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fmul v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: fmul v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: fmul v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmul v2.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s +; NONEON-NOSVE-NEXT: stp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fmul <16 x half> %op1, %op2 @@ -494,6 +1050,11 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmul v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = fmul <2 x float> %op1, %op2 ret <2 x float> %res } @@ -507,6 +1068,11 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fmul <4 x float> %op1, %op2 ret <4 x float> %res } @@ -522,6 +1088,15 @@ define void @fmul_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmul v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmul v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fmul <8 x float> %op1, %op2 @@ -538,6 +1113,11 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmul v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = fmul <2 x double> %op1, %op2 ret <2 x double> %res } @@ -553,6 +1133,15 @@ define void @fmul_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmul_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmul v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fmul v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fmul <4 x double> %op1, %op2 @@ -572,6 +1161,12 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) { ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.4h, #128, lsl #8 +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = fneg <2 x half> %op ret <2 x half> %res } @@ -584,6 +1179,12 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) { ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.4h, #128, lsl #8 +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = fneg <4 x half> %op ret <4 x half> %res } @@ -596,6 +1197,12 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) { ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v1.8h, #128, lsl #8 +; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = fneg <8 x half> %op ret <8 x half> %res } @@ -609,6 +1216,15 @@ define void @fneg_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fneg z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.8h, #128, lsl #8 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = fneg <16 x half> %op store <16 x half> %res, ptr %a @@ -623,6 +1239,11 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) { ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fneg v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = fneg <2 x float> %op ret <2 x float> %res } @@ -635,6 +1256,11 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) { ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fneg <4 x float> %op ret <4 x float> %res } @@ -648,6 +1274,14 @@ define void @fneg_v8f32(ptr %a) { ; CHECK-NEXT: fneg z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fneg v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = fneg <8 x float> %op store <8 x float> %res, ptr %a @@ -662,6 +1296,11 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) { ; CHECK-NEXT: fneg z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fneg <2 x double> %op ret <2 x double> %res } @@ -675,6 +1314,14 @@ define void @fneg_v4f64(ptr %a) { ; CHECK-NEXT: fneg z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fneg_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fneg v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = fneg <4 x double> %op store <4 x double> %res, ptr %a @@ -693,6 +1340,30 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) { ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: mov h3, v0.h[2] +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsqrt s2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s1, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fsqrt s3, s3 +; NONEON-NOSVE-NEXT: fsqrt s4, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s2 +; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s3 +; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s4 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -705,6 +1376,30 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) { ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: mov h3, v0.h[2] +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsqrt s2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fsqrt s1, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fsqrt s3, s3 +; NONEON-NOSVE-NEXT: fsqrt s4, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s2 +; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s3 +; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s4 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -717,6 +1412,48 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) { ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: mov h3, v0.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[3] +; NONEON-NOSVE-NEXT: mov h5, v0.h[4] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: mov h7, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fsqrt s2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h0 +; NONEON-NOSVE-NEXT: fcvt h0, s2 +; NONEON-NOSVE-NEXT: fsqrt s1, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0] +; NONEON-NOSVE-NEXT: fsqrt s3, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s3 +; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0] +; NONEON-NOSVE-NEXT: fsqrt s4, s4 +; NONEON-NOSVE-NEXT: fcvt h1, s4 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: fsqrt s5, s5 +; NONEON-NOSVE-NEXT: fcvt h1, s5 +; NONEON-NOSVE-NEXT: mov v0.h[4], v1.h[0] +; NONEON-NOSVE-NEXT: fsqrt s6, s6 +; NONEON-NOSVE-NEXT: fcvt h1, s6 +; NONEON-NOSVE-NEXT: mov v0.h[5], v1.h[0] +; NONEON-NOSVE-NEXT: fsqrt s7, s7 +; NONEON-NOSVE-NEXT: fcvt h1, s7 +; NONEON-NOSVE-NEXT: mov v0.h[6], v1.h[0] +; NONEON-NOSVE-NEXT: fsqrt s2, s16 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: mov v0.h[7], v1.h[0] +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -730,6 +1467,89 @@ define void @fsqrt_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fsqrt z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q16, [x0] +; NONEON-NOSVE-NEXT: mov h0, v1.h[1] +; NONEON-NOSVE-NEXT: mov h17, v16.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s18, h16 +; NONEON-NOSVE-NEXT: mov h19, v16.h[2] +; NONEON-NOSVE-NEXT: mov h4, v1.h[3] +; NONEON-NOSVE-NEXT: mov h20, v16.h[3] +; NONEON-NOSVE-NEXT: mov h5, v1.h[4] +; NONEON-NOSVE-NEXT: mov h21, v16.h[4] +; NONEON-NOSVE-NEXT: mov h6, v1.h[5] +; NONEON-NOSVE-NEXT: mov h22, v16.h[5] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fsqrt s2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: mov h7, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s21, h21 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s22, h22 +; NONEON-NOSVE-NEXT: mov h23, v16.h[6] +; NONEON-NOSVE-NEXT: mov h16, v16.h[7] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s23, h23 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fsqrt s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v2.h[1], v0.h[0] +; NONEON-NOSVE-NEXT: fsqrt s17, s17 +; NONEON-NOSVE-NEXT: fcvt h17, s17 +; NONEON-NOSVE-NEXT: fsqrt s18, s18 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: mov v18.h[1], v17.h[0] +; NONEON-NOSVE-NEXT: fsqrt s3, s3 +; NONEON-NOSVE-NEXT: fcvt h0, s3 +; NONEON-NOSVE-NEXT: mov v2.h[2], v0.h[0] +; NONEON-NOSVE-NEXT: fsqrt s19, s19 +; NONEON-NOSVE-NEXT: fcvt h17, s19 +; NONEON-NOSVE-NEXT: mov v18.h[2], v17.h[0] +; NONEON-NOSVE-NEXT: fsqrt s4, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s4 +; NONEON-NOSVE-NEXT: mov v2.h[3], v0.h[0] +; NONEON-NOSVE-NEXT: fsqrt s20, s20 +; NONEON-NOSVE-NEXT: fcvt h3, s20 +; NONEON-NOSVE-NEXT: mov v18.h[3], v3.h[0] +; NONEON-NOSVE-NEXT: fsqrt s5, s5 +; NONEON-NOSVE-NEXT: fcvt h0, s5 +; NONEON-NOSVE-NEXT: mov v2.h[4], v0.h[0] +; NONEON-NOSVE-NEXT: fsqrt s21, s21 +; NONEON-NOSVE-NEXT: fcvt h3, s21 +; NONEON-NOSVE-NEXT: mov v18.h[4], v3.h[0] +; NONEON-NOSVE-NEXT: fsqrt s6, s6 +; NONEON-NOSVE-NEXT: fcvt h0, s6 +; NONEON-NOSVE-NEXT: mov v2.h[5], v0.h[0] +; NONEON-NOSVE-NEXT: fsqrt s22, s22 +; NONEON-NOSVE-NEXT: fcvt h3, s22 +; NONEON-NOSVE-NEXT: mov v18.h[5], v3.h[0] +; NONEON-NOSVE-NEXT: fsqrt s7, s7 +; NONEON-NOSVE-NEXT: fcvt h0, s7 +; NONEON-NOSVE-NEXT: mov v2.h[6], v0.h[0] +; NONEON-NOSVE-NEXT: fsqrt s23, s23 +; NONEON-NOSVE-NEXT: fcvt h3, s23 +; NONEON-NOSVE-NEXT: mov v18.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: fsqrt s16, s16 +; NONEON-NOSVE-NEXT: fcvt h3, s16 +; NONEON-NOSVE-NEXT: mov v18.h[7], v3.h[0] +; NONEON-NOSVE-NEXT: fsqrt s1, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: stp q18, q2, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -744,6 +1564,11 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) { ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fsqrt v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -756,6 +1581,11 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) { ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fsqrt v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -769,6 +1599,14 @@ define void @fsqrt_v8f32(ptr %a) { ; CHECK-NEXT: fsqrt z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fsqrt v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fsqrt v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -783,6 +1621,11 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) { ; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fsqrt v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -796,6 +1639,14 @@ define void @fsqrt_v4f64(ptr %a) { ; CHECK-NEXT: fsqrt z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsqrt_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fsqrt v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fsqrt v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -815,6 +1666,14 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fsub v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fsub <2 x half> %op1, %op2 ret <2 x half> %res } @@ -828,6 +1687,14 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fsub v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fsub <4 x half> %op1, %op2 ret <4 x half> %res } @@ -841,6 +1708,18 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fsub v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: fsub v1.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fsub <8 x half> %op1, %op2 ret <8 x half> %res } @@ -856,6 +1735,29 @@ define void @fsub_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fsub v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: fsub v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: fsub v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fsub v2.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s +; NONEON-NOSVE-NEXT: stp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fsub <16 x half> %op1, %op2 @@ -872,6 +1774,11 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fsub v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = fsub <2 x float> %op1, %op2 ret <2 x float> %res } @@ -885,6 +1792,11 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fsub v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = fsub <4 x float> %op1, %op2 ret <4 x float> %res } @@ -900,6 +1812,15 @@ define void @fsub_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fsub v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fsub v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fsub <8 x float> %op1, %op2 @@ -916,6 +1837,11 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fsub v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = fsub <2 x double> %op1, %op2 ret <2 x double> %res } @@ -931,6 +1857,15 @@ define void @fsub_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fsub_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fsub v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fsub v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fsub <4 x double> %op1, %op2 @@ -950,6 +1885,11 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) { ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: bic v0.4h, #128, lsl #8 +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.fabs.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -962,6 +1902,11 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) { ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: bic v0.4h, #128, lsl #8 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -974,6 +1919,11 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) { ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: bic v0.8h, #128, lsl #8 +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -987,6 +1937,14 @@ define void @fabs_v16f16(ptr %a) { ; CHECK-NEXT: fabs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: bic v0.8h, #128, lsl #8 +; NONEON-NOSVE-NEXT: bic v1.8h, #128, lsl #8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -1001,6 +1959,11 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) { ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fabs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -1013,6 +1976,11 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) { ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fabs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -1026,6 +1994,14 @@ define void @fabs_v8f32(ptr %a) { ; CHECK-NEXT: fabs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fabs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fabs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -1040,6 +2016,11 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) { ; CHECK-NEXT: fabs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fabs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -1053,6 +2034,14 @@ define void @fabs_v4f64(ptr %a) { ; CHECK-NEXT: fabs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fabs_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fabs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fabs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll index cbd0ad66fba767..c5ed70c8a5f2f8 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,14 @@ define <2 x i16> @fcmp_oeq_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <2 x half> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i16> ret <2 x i16> %sext @@ -34,6 +43,14 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <4 x half> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i16> ret <4 x i16> %sext @@ -49,6 +66,65 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h6 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: mov h4, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov h6, v0.h[4] +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s2, s5 +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h5, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[5] +; NONEON-NOSVE-NEXT: mov h4, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[6] +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: fcvt s3, h5 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <8 x half> %op1, %op2 %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext @@ -66,6 +142,123 @@ define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, eq +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, eq +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, eq +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, eq +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, eq +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, eq +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp oeq <16 x half> %op1, %op2 @@ -84,6 +277,11 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcmeq v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <2 x float> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i32> ret <2 x i32> %sext @@ -99,6 +297,11 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <4 x float> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext @@ -116,6 +319,15 @@ define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fcmeq v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %cmp = fcmp oeq <8 x float> %op1, %op2 @@ -132,6 +344,11 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcmeq d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <1 x double> %op1, %op2 %sext = sext <1 x i1> %cmp to <1 x i64> ret <1 x i64> %sext @@ -147,6 +364,11 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcmeq v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %cmp = fcmp oeq <2 x double> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %sext @@ -164,6 +386,15 @@ define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oeq_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcmeq v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fcmeq v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %cmp = fcmp oeq <4 x double> %op1, %op2 @@ -192,6 +423,139 @@ define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ueq_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h2 +; NONEON-NOSVE-NEXT: mov h5, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: mov h7, v1.h[2] +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s6, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[4] +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: csinv w12, w9, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s7, s5 +; NONEON-NOSVE-NEXT: mov h5, v2.h[5] +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: csinv w10, w9, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: csinv w11, w9, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s6, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s6, h16 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: csinv w9, w9, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s7, s5 +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: mov h7, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w13, eq +; NONEON-NOSVE-NEXT: csinv w13, w13, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s6, s3 +; NONEON-NOSVE-NEXT: fcvt s3, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h7 +; NONEON-NOSVE-NEXT: mov h6, v0.h[2] +; NONEON-NOSVE-NEXT: mov h7, v1.h[2] +; NONEON-NOSVE-NEXT: csetm w14, eq +; NONEON-NOSVE-NEXT: csinv w14, w14, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s4, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w15, eq +; NONEON-NOSVE-NEXT: csinv w15, w15, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s5, s3 +; NONEON-NOSVE-NEXT: mov h3, v0.h[3] +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w16, eq +; NONEON-NOSVE-NEXT: csinv w16, w16, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s4, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h3 +; NONEON-NOSVE-NEXT: fmov s2, w12 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w17, eq +; NONEON-NOSVE-NEXT: csinv w17, w17, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[4] +; NONEON-NOSVE-NEXT: fmov s3, w17 +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v3.h[1], w16 +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v0.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov v2.h[2], w10 +; NONEON-NOSVE-NEXT: mov v3.h[2], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: mov h7, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w11 +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov v3.h[3], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: fcvt s5, h7 +; NONEON-NOSVE-NEXT: mov v2.h[4], w9 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v3.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov v2.h[5], w13 +; NONEON-NOSVE-NEXT: mov v3.h[5], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: mov v2.h[6], w14 +; NONEON-NOSVE-NEXT: mov v3.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc +; NONEON-NOSVE-NEXT: mov v2.h[7], w15 +; NONEON-NOSVE-NEXT: mov v3.h[7], w8 +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ueq <16 x half> %op1, %op2 @@ -220,6 +584,139 @@ define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_one_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h2 +; NONEON-NOSVE-NEXT: mov h5, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: mov h7, v1.h[2] +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: fcmp s6, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[4] +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w9, mi +; NONEON-NOSVE-NEXT: csinv w12, w9, wzr, le +; NONEON-NOSVE-NEXT: fcmp s7, s5 +; NONEON-NOSVE-NEXT: mov h5, v2.h[5] +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w9, mi +; NONEON-NOSVE-NEXT: csinv w10, w9, wzr, le +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x1] +; NONEON-NOSVE-NEXT: csetm w9, mi +; NONEON-NOSVE-NEXT: csinv w11, w9, wzr, le +; NONEON-NOSVE-NEXT: fcmp s6, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s6, h16 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w9, mi +; NONEON-NOSVE-NEXT: csinv w9, w9, wzr, le +; NONEON-NOSVE-NEXT: fcmp s7, s5 +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: mov h7, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w13, mi +; NONEON-NOSVE-NEXT: csinv w13, w13, wzr, le +; NONEON-NOSVE-NEXT: fcmp s6, s3 +; NONEON-NOSVE-NEXT: fcvt s3, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h7 +; NONEON-NOSVE-NEXT: mov h6, v0.h[2] +; NONEON-NOSVE-NEXT: mov h7, v1.h[2] +; NONEON-NOSVE-NEXT: csetm w14, mi +; NONEON-NOSVE-NEXT: csinv w14, w14, wzr, le +; NONEON-NOSVE-NEXT: fcmp s4, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w15, mi +; NONEON-NOSVE-NEXT: csinv w15, w15, wzr, le +; NONEON-NOSVE-NEXT: fcmp s5, s3 +; NONEON-NOSVE-NEXT: mov h3, v0.h[3] +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w16, mi +; NONEON-NOSVE-NEXT: csinv w16, w16, wzr, le +; NONEON-NOSVE-NEXT: fcmp s4, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h3 +; NONEON-NOSVE-NEXT: fmov s2, w12 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w17, mi +; NONEON-NOSVE-NEXT: csinv w17, w17, wzr, le +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[4] +; NONEON-NOSVE-NEXT: fmov s3, w17 +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: mov v3.h[1], w16 +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v0.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov v2.h[2], w10 +; NONEON-NOSVE-NEXT: mov v3.h[2], w8 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: mov h7, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w11 +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov v3.h[3], w8 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: fcvt s5, h7 +; NONEON-NOSVE-NEXT: mov v2.h[4], w9 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v3.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov v2.h[5], w13 +; NONEON-NOSVE-NEXT: mov v3.h[5], w8 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: fcmp s1, s0 +; NONEON-NOSVE-NEXT: mov v2.h[6], w14 +; NONEON-NOSVE-NEXT: mov v3.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le +; NONEON-NOSVE-NEXT: mov v2.h[7], w15 +; NONEON-NOSVE-NEXT: mov v3.h[7], w8 +; NONEON-NOSVE-NEXT: stp q3, q2, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp one <16 x half> %op1, %op2 @@ -244,6 +741,123 @@ define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_une_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, ne +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, ne +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, ne +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, ne +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, ne +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, ne +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, ne +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, ne +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, ne +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp une <16 x half> %op1, %op2 @@ -268,6 +882,123 @@ define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ogt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, gt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, gt +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, gt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, gt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, gt +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, gt +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, gt +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, gt +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, gt +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ogt <16 x half> %op1, %op2 @@ -295,6 +1026,123 @@ define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ugt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, hi +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, hi +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, hi +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, hi +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, hi +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, hi +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, hi +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, hi +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, hi +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, hi +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ugt <16 x half> %op1, %op2 @@ -319,6 +1167,123 @@ define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_olt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, mi +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, mi +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, mi +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, mi +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, mi +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, mi +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, mi +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, mi +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, mi +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, mi +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp olt <16 x half> %op1, %op2 @@ -346,6 +1311,123 @@ define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ult_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, lt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, lt +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, lt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, lt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, lt +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, lt +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, lt +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, lt +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, lt +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ult <16 x half> %op1, %op2 @@ -370,6 +1452,123 @@ define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_oge_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, ge +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, ge +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, ge +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, ge +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, ge +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, ge +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, ge +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, ge +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, ge +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp oge <16 x half> %op1, %op2 @@ -397,6 +1596,123 @@ define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_uge_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, pl +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, pl +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, pl +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, pl +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, pl +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, pl +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, pl +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, pl +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, pl +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, pl +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp uge <16 x half> %op1, %op2 @@ -421,6 +1737,123 @@ define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ole_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, ls +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, ls +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, ls +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, ls +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, ls +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, ls +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, ls +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, ls +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, ls +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, ls +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ole <16 x half> %op1, %op2 @@ -448,6 +1881,123 @@ define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ule_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, le +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, le +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, le +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, le +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, le +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, le +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, le +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ule <16 x half> %op1, %op2 @@ -472,6 +2022,123 @@ define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_uno_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, vs +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, vs +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, vs +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, vs +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, vs +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, vs +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, vs +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, vs +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, vs +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, vs +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp uno <16 x half> %op1, %op2 @@ -499,6 +2166,123 @@ define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ord_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, vc +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, vc +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, vc +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, vc +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, vc +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, vc +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, vc +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, vc +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, vc +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, vc +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp ord <16 x half> %op1, %op2 @@ -523,6 +2307,123 @@ define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_eq_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, eq +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, eq +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, eq +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, eq +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, eq +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, eq +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast oeq <16 x half> %op1, %op2 @@ -547,6 +2448,123 @@ define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ne_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, ne +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, ne +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, ne +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, ne +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, ne +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, ne +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, ne +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, ne +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, ne +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast one <16 x half> %op1, %op2 @@ -571,6 +2589,123 @@ define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_gt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, gt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, gt +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, gt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, gt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, gt +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, gt +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, gt +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, gt +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, gt +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, gt +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast ogt <16 x half> %op1, %op2 @@ -595,6 +2730,123 @@ define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_lt_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, lt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, lt +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, lt +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, lt +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, lt +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, lt +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, lt +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, lt +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, lt +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, lt +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast olt <16 x half> %op1, %op2 @@ -619,6 +2871,123 @@ define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_ge_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, ge +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, ge +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, ge +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, ge +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, ge +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, ge +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, ge +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, ge +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, ge +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, ge +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast oge <16 x half> %op1, %op2 @@ -643,6 +3012,123 @@ define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcmp_le_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x1, #16] +; NONEON-NOSVE-NEXT: mov h0, v2.h[1] +; NONEON-NOSVE-NEXT: mov h3, v1.h[1] +; NONEON-NOSVE-NEXT: mov h4, v2.h[2] +; NONEON-NOSVE-NEXT: mov h5, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h2 +; NONEON-NOSVE-NEXT: fcvt s7, h1 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h0, v2.h[3] +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[4] +; NONEON-NOSVE-NEXT: mov h7, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w12, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v2.h[5] +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w11, le +; NONEON-NOSVE-NEXT: fcmp s3, s0 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w9, le +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: csetm w10, le +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[1] +; NONEON-NOSVE-NEXT: mov h5, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: csetm w13, le +; NONEON-NOSVE-NEXT: fcmp s7, s3 +; NONEON-NOSVE-NEXT: fmov s7, w12 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: csetm w14, le +; NONEON-NOSVE-NEXT: fcmp s6, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: mov v7.h[1], w8 +; NONEON-NOSVE-NEXT: csetm w15, le +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h4, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: mov v7.h[2], w11 +; NONEON-NOSVE-NEXT: csetm w16, le +; NONEON-NOSVE-NEXT: fcmp s5, s2 +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: csetm w17, le +; NONEON-NOSVE-NEXT: mov v7.h[3], w9 +; NONEON-NOSVE-NEXT: fmov s2, w17 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: mov h4, v0.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[1], w16 +; NONEON-NOSVE-NEXT: mov v7.h[4], w10 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: mov h5, v1.h[5] +; NONEON-NOSVE-NEXT: mov h6, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: mov v7.h[5], w13 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: mov h4, v0.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v7.h[6], w14 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s6, s5 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v7.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: fcmp s4, s3 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: fcmp s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: csetm w8, le +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: stp q2, q7, [x2] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %cmp = fcmp fast ole <16 x half> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll index 57d072a7bcd68b..055af194be211a 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,17 @@ define void @fp_convert_combine_crash(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fp_convert_combine_crash: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov v0.4s, #8.00000000 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmul v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmul v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %f = load <8 x float>, ptr %a %mul.i = fmul <8 x float> %f, diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll index 6a2dc3c7182527..ce8902cfa16c3d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,12 @@ define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f16_to_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %res = fpext <2 x half> %a to <2 x float> store <2 x float> %res, ptr %b ret void @@ -31,6 +38,12 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f16_to_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %res = fpext <4 x half> %a to <4 x float> store <4 x float> %res, ptr %b ret void @@ -48,6 +61,17 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = fpext <8 x half> %a to <8 x float> store <8 x float> %res, ptr %b ret void @@ -72,6 +96,21 @@ define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) { ; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: stp q2, q1, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: stp q0, q3, [x0] +; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %res = fpext <16 x half> %a to <16 x float> store <16 x float> %res, ptr %b ret void @@ -90,6 +129,13 @@ define void @fcvt_v2f16_v2f32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %res = fpext <2 x half> %op1 to <2 x float> store <2 x float> %res, ptr %b @@ -104,6 +150,13 @@ define void @fcvt_v4f16_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fpext <4 x half> %op1 to <4 x float> store <4 x float> %res, ptr %b @@ -121,6 +174,18 @@ define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z1.s, p0/m, z1.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fpext <8 x half> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -145,6 +210,22 @@ define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x float> store <16 x float> %res, ptr %b @@ -162,6 +243,13 @@ define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt d0, h0 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f16_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: fcvt d0, h0 +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x half>, ptr %a %res = fpext <1 x half> %op1 to <1 x double> store <1 x double> %res, ptr %b @@ -176,6 +264,14 @@ define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %res = fpext <2 x half> %op1 to <2 x double> store <2 x double> %res, ptr %b @@ -193,6 +289,19 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fpext <4 x half> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -217,6 +326,26 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s +; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s +; NONEON-NOSVE-NEXT: stp q0, q2, [x1] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fpext <8 x half> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -258,6 +387,38 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q4, q0, [x1, #32] ; CHECK-NEXT: stp q1, q2, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #40] +; NONEON-NOSVE-NEXT: fcvtl v5.2d, v5.2s +; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s +; NONEON-NOSVE-NEXT: fcvtl v4.2d, v4.2s +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v7.2s +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v6.2s +; NONEON-NOSVE-NEXT: stp q2, q0, [x1, #32] +; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -275,6 +436,13 @@ define void @fcvt_v1f32_v1f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt d0, s0 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f32_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x float>, ptr %a %res = fpext <1 x float> %op1 to <1 x double> store <1 x double> %res, ptr %b @@ -289,6 +457,13 @@ define void @fcvt_v2f32_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x float>, ptr %a %res = fpext <2 x float> %op1 to <2 x double> store <2 x double> %res, ptr %b @@ -306,6 +481,18 @@ define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z1.d, p0/m, z1.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fpext <4 x float> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -330,6 +517,22 @@ define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s +; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fpext <8 x float> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -348,6 +551,13 @@ define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: str s0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x float>, ptr %a %res = fptrunc <2 x float> %op1 to <2 x half> store <2 x half> %res, ptr %b @@ -362,6 +572,13 @@ define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fptrunc <4 x float> %op1 to <4 x half> store <4 x half> %res, ptr %b @@ -379,6 +596,14 @@ define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1] ; CHECK-NEXT: st1h { z1.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptrunc <8 x float> %op1 to <8 x half> store <8 x half> %res, ptr %b @@ -397,6 +622,13 @@ define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: str h0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <1 x double>, ptr %a %res = fptrunc <1 x double> %op1 to <1 x half> store <1 x half> %res, ptr %b @@ -411,6 +643,16 @@ define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: mov d1, v0.d[1] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: fcvt h1, d1 +; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0] +; NONEON-NOSVE-NEXT: str s0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x double>, ptr %a %res = fptrunc <2 x double> %op1 to <2 x half> store <2 x half> %res, ptr %b @@ -428,6 +670,21 @@ define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) { ; CHECK-NEXT: st1h { z0.d }, p0, [x1, x8, lsl #1] ; CHECK-NEXT: st1h { z1.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: mov d1, v0.d[1] +; NONEON-NOSVE-NEXT: fcvt h0, d0 +; NONEON-NOSVE-NEXT: fcvt h1, d1 +; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, d2 +; NONEON-NOSVE-NEXT: mov d2, v2.d[1] +; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, d2 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptrunc <4 x double> %op1 to <4 x half> store <4 x half> %res, ptr %b @@ -446,6 +703,13 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: str s0, [x0] +; NONEON-NOSVE-NEXT: ret %res = fptrunc <1 x double> %op1 to <1 x float> store <1 x float> %res, ptr %b ret void @@ -459,6 +723,12 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) { ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %res = fptrunc <2 x double> %op1 to <2 x float> store <2 x float> %res, ptr %b ret void @@ -475,6 +745,14 @@ define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2] ; CHECK-NEXT: st1w { z1.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptrunc <4 x double> %op1 to <4 x float> store <4 x float> %res, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll index 153a04f4865715..9d2b55903f3141 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,18 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <4 x half> %op1, %op2 %res = fadd contract <4 x half> %mul, %op3 ret <4 x half> %res @@ -32,6 +45,26 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fmul v3.4s, v4.4s, v3.4s +; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v3.4s +; NONEON-NOSVE-NEXT: fadd v1.4s, v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <8 x half> %op1, %op2 %res = fadd contract <8 x half> %mul, %op3 ret <8 x half> %res @@ -49,6 +82,46 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: fcvtl v5.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fmul v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: fmul v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fmul v2.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s +; NONEON-NOSVE-NEXT: ldp q0, q2, [x2] +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v6.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v7.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h +; NONEON-NOSVE-NEXT: fadd v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: fadd v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s +; NONEON-NOSVE-NEXT: stp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %op3 = load <16 x half>, ptr %c @@ -68,6 +141,12 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmla v2.2s, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <2 x float> %op1, %op2 %res = fadd contract <2 x float> %mul, %op3 ret <2 x float> %res @@ -83,6 +162,12 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmla v2.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <4 x float> %op1, %op2 %res = fadd contract <4 x float> %mul, %op3 ret <4 x float> %res @@ -100,6 +185,16 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q5, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fmla v1.4s, v0.4s, v2.4s +; NONEON-NOSVE-NEXT: fmla v5.4s, v4.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q1, q5, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %op3 = load <8 x float>, ptr %c @@ -114,6 +209,11 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double ; CHECK: // %bb.0: ; CHECK-NEXT: fmadd d0, d0, d1, d2 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmadd d0, d0, d1, d2 +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <1 x double> %op1, %op2 %res = fadd contract <1 x double> %mul, %op3 ret <1 x double> %res @@ -129,6 +229,12 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmla v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %mul = fmul contract <2 x double> %op1, %op2 %res = fadd contract <2 x double> %mul, %op3 ret <2 x double> %res @@ -146,6 +252,16 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fma_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q4, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q5, [x2] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fmla v1.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: fmla v5.2d, v4.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q1, q5, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %op3 = load <4 x double>, ptr %c diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll index 6945a6102c0553..a96adfec2ad105 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,38 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[2] +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: fcvt s7, h0 +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fmaxnm s5, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: fmaxnm s3, s4, s3 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s5 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: fmaxnm s1, s4, s1 +; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -30,6 +63,64 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h7, v0.h[2] +; NONEON-NOSVE-NEXT: mov h16, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fmaxnm s3, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s4 +; NONEON-NOSVE-NEXT: fmaxnm s4, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: mov h7, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fmaxnm s5, s5, s16 +; NONEON-NOSVE-NEXT: mov h16, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt s3, h6 +; NONEON-NOSVE-NEXT: fcvt s6, h7 +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0] +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fmaxnm s3, s6, s3 +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h6 +; NONEON-NOSVE-NEXT: fmaxnm s6, s16, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0] +; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h3, s6 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -45,6 +136,119 @@ define void @fmaxnm_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: mov h7, v0.h[1] +; NONEON-NOSVE-NEXT: mov h16, v0.h[2] +; NONEON-NOSVE-NEXT: mov h18, v2.h[1] +; NONEON-NOSVE-NEXT: mov h5, v1.h[1] +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h17, v3.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s19, h0 +; NONEON-NOSVE-NEXT: fcvt s20, h3 +; NONEON-NOSVE-NEXT: fcvt s21, h2 +; NONEON-NOSVE-NEXT: mov h22, v3.h[2] +; NONEON-NOSVE-NEXT: mov h23, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fmaxnm s4, s19, s4 +; NONEON-NOSVE-NEXT: mov h19, v0.h[3] +; NONEON-NOSVE-NEXT: mov h24, v3.h[3] +; NONEON-NOSVE-NEXT: fmaxnm s20, s21, s20 +; NONEON-NOSVE-NEXT: fcvt s21, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov h23, v2.h[3] +; NONEON-NOSVE-NEXT: mov h25, v2.h[6] +; NONEON-NOSVE-NEXT: fmaxnm s5, s7, s5 +; NONEON-NOSVE-NEXT: mov h7, v1.h[3] +; NONEON-NOSVE-NEXT: fmaxnm s6, s16, s6 +; NONEON-NOSVE-NEXT: fmaxnm s16, s18, s17 +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s18, h19 +; NONEON-NOSVE-NEXT: fcvt s19, h24 +; NONEON-NOSVE-NEXT: mov h24, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h17, s5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt h5, s20 +; NONEON-NOSVE-NEXT: fmaxnm s20, s22, s21 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt s21, h23 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: mov h22, v0.h[4] +; NONEON-NOSVE-NEXT: mov h23, v2.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0] +; NONEON-NOSVE-NEXT: mov h17, v1.h[4] +; NONEON-NOSVE-NEXT: fmaxnm s7, s18, s7 +; NONEON-NOSVE-NEXT: mov h18, v3.h[4] +; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s20 +; NONEON-NOSVE-NEXT: fmaxnm s19, s21, s19 +; NONEON-NOSVE-NEXT: fcvt s20, h23 +; NONEON-NOSVE-NEXT: mov h21, v1.h[5] +; NONEON-NOSVE-NEXT: mov h23, v2.h[5] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0] +; NONEON-NOSVE-NEXT: fcvt s6, h17 +; NONEON-NOSVE-NEXT: fcvt s17, h22 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: mov h22, v3.h[5] +; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s19 +; NONEON-NOSVE-NEXT: mov h19, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmaxnm s6, s17, s6 +; NONEON-NOSVE-NEXT: mov h17, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fmaxnm s18, s20, s18 +; NONEON-NOSVE-NEXT: mov h20, v3.h[6] +; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt s7, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt s16, h21 +; NONEON-NOSVE-NEXT: fcvt s21, h24 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcvt s23, h25 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: mov h3, v3.h[7] +; NONEON-NOSVE-NEXT: fmaxnm s7, s22, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmaxnm s16, s21, s16 +; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0] +; NONEON-NOSVE-NEXT: fmaxnm s6, s19, s17 +; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0] +; NONEON-NOSVE-NEXT: fmaxnm s17, s23, s20 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s17 +; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0] +; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: stp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -61,6 +265,11 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnm v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -74,6 +283,11 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnm v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -89,6 +303,15 @@ define void @fmaxnm_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmaxnm v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmaxnm v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -101,6 +324,11 @@ define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmaxnm d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnm d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -114,6 +342,11 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnm v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -129,6 +362,15 @@ define void @fmaxnm_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxnm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmaxnm v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fmaxnm v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2) @@ -149,6 +391,38 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[2] +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: fcvt s7, h0 +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fminnm s5, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: fminnm s3, s4, s3 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s5 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: fminnm s1, s4, s1 +; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -162,6 +436,64 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h7, v0.h[2] +; NONEON-NOSVE-NEXT: mov h16, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fminnm s3, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s4 +; NONEON-NOSVE-NEXT: fminnm s4, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: mov h7, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fminnm s5, s5, s16 +; NONEON-NOSVE-NEXT: mov h16, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt s3, h6 +; NONEON-NOSVE-NEXT: fcvt s6, h7 +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0] +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fminnm s3, s6, s3 +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h6 +; NONEON-NOSVE-NEXT: fminnm s6, s16, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0] +; NONEON-NOSVE-NEXT: fminnm s4, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h3, s6 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -177,6 +509,119 @@ define void @fminnm_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: mov h7, v0.h[1] +; NONEON-NOSVE-NEXT: mov h16, v0.h[2] +; NONEON-NOSVE-NEXT: mov h18, v2.h[1] +; NONEON-NOSVE-NEXT: mov h5, v1.h[1] +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h17, v3.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s19, h0 +; NONEON-NOSVE-NEXT: fcvt s20, h3 +; NONEON-NOSVE-NEXT: fcvt s21, h2 +; NONEON-NOSVE-NEXT: mov h22, v3.h[2] +; NONEON-NOSVE-NEXT: mov h23, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fminnm s4, s19, s4 +; NONEON-NOSVE-NEXT: mov h19, v0.h[3] +; NONEON-NOSVE-NEXT: mov h24, v3.h[3] +; NONEON-NOSVE-NEXT: fminnm s20, s21, s20 +; NONEON-NOSVE-NEXT: fcvt s21, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov h23, v2.h[3] +; NONEON-NOSVE-NEXT: mov h25, v2.h[6] +; NONEON-NOSVE-NEXT: fminnm s5, s7, s5 +; NONEON-NOSVE-NEXT: mov h7, v1.h[3] +; NONEON-NOSVE-NEXT: fminnm s6, s16, s6 +; NONEON-NOSVE-NEXT: fminnm s16, s18, s17 +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s18, h19 +; NONEON-NOSVE-NEXT: fcvt s19, h24 +; NONEON-NOSVE-NEXT: mov h24, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h17, s5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt h5, s20 +; NONEON-NOSVE-NEXT: fminnm s20, s22, s21 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt s21, h23 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: mov h22, v0.h[4] +; NONEON-NOSVE-NEXT: mov h23, v2.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0] +; NONEON-NOSVE-NEXT: mov h17, v1.h[4] +; NONEON-NOSVE-NEXT: fminnm s7, s18, s7 +; NONEON-NOSVE-NEXT: mov h18, v3.h[4] +; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s20 +; NONEON-NOSVE-NEXT: fminnm s19, s21, s19 +; NONEON-NOSVE-NEXT: fcvt s20, h23 +; NONEON-NOSVE-NEXT: mov h21, v1.h[5] +; NONEON-NOSVE-NEXT: mov h23, v2.h[5] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0] +; NONEON-NOSVE-NEXT: fcvt s6, h17 +; NONEON-NOSVE-NEXT: fcvt s17, h22 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: mov h22, v3.h[5] +; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s19 +; NONEON-NOSVE-NEXT: mov h19, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fminnm s6, s17, s6 +; NONEON-NOSVE-NEXT: mov h17, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fminnm s18, s20, s18 +; NONEON-NOSVE-NEXT: mov h20, v3.h[6] +; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt s7, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt s16, h21 +; NONEON-NOSVE-NEXT: fcvt s21, h24 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcvt s23, h25 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: mov h3, v3.h[7] +; NONEON-NOSVE-NEXT: fminnm s7, s22, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fminnm s16, s21, s16 +; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0] +; NONEON-NOSVE-NEXT: fminnm s6, s19, s17 +; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0] +; NONEON-NOSVE-NEXT: fminnm s17, s23, s20 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s17 +; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0] +; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: stp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -193,6 +638,11 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnm v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -206,6 +656,11 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnm v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -221,6 +676,15 @@ define void @fminnm_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fminnm v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fminnm v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -233,6 +697,11 @@ define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fminnm d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnm d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -246,6 +715,11 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnm v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -261,6 +735,15 @@ define void @fminnm_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminnm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fminnm v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fminnm v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2) @@ -281,6 +764,38 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[2] +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: fcvt s7, h0 +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s2, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fmax s5, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: fmax s3, s4, s3 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s5 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: fmax s1, s4, s1 +; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -294,6 +809,64 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h7, v0.h[2] +; NONEON-NOSVE-NEXT: mov h16, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fmax s3, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s4 +; NONEON-NOSVE-NEXT: fmax s4, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: mov h7, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fmax s5, s5, s16 +; NONEON-NOSVE-NEXT: mov h16, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt s3, h6 +; NONEON-NOSVE-NEXT: fcvt s6, h7 +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0] +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fmax s3, s6, s3 +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h6 +; NONEON-NOSVE-NEXT: fmax s6, s16, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0] +; NONEON-NOSVE-NEXT: fmax s4, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h3, s6 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -309,6 +882,119 @@ define void @fmax_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: mov h7, v0.h[1] +; NONEON-NOSVE-NEXT: mov h16, v0.h[2] +; NONEON-NOSVE-NEXT: mov h18, v2.h[1] +; NONEON-NOSVE-NEXT: mov h5, v1.h[1] +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h17, v3.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s19, h0 +; NONEON-NOSVE-NEXT: fcvt s20, h3 +; NONEON-NOSVE-NEXT: fcvt s21, h2 +; NONEON-NOSVE-NEXT: mov h22, v3.h[2] +; NONEON-NOSVE-NEXT: mov h23, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fmax s4, s19, s4 +; NONEON-NOSVE-NEXT: mov h19, v0.h[3] +; NONEON-NOSVE-NEXT: mov h24, v3.h[3] +; NONEON-NOSVE-NEXT: fmax s20, s21, s20 +; NONEON-NOSVE-NEXT: fcvt s21, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov h23, v2.h[3] +; NONEON-NOSVE-NEXT: mov h25, v2.h[6] +; NONEON-NOSVE-NEXT: fmax s5, s7, s5 +; NONEON-NOSVE-NEXT: mov h7, v1.h[3] +; NONEON-NOSVE-NEXT: fmax s6, s16, s6 +; NONEON-NOSVE-NEXT: fmax s16, s18, s17 +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s18, h19 +; NONEON-NOSVE-NEXT: fcvt s19, h24 +; NONEON-NOSVE-NEXT: mov h24, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h17, s5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt h5, s20 +; NONEON-NOSVE-NEXT: fmax s20, s22, s21 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt s21, h23 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: mov h22, v0.h[4] +; NONEON-NOSVE-NEXT: mov h23, v2.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0] +; NONEON-NOSVE-NEXT: mov h17, v1.h[4] +; NONEON-NOSVE-NEXT: fmax s7, s18, s7 +; NONEON-NOSVE-NEXT: mov h18, v3.h[4] +; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s20 +; NONEON-NOSVE-NEXT: fmax s19, s21, s19 +; NONEON-NOSVE-NEXT: fcvt s20, h23 +; NONEON-NOSVE-NEXT: mov h21, v1.h[5] +; NONEON-NOSVE-NEXT: mov h23, v2.h[5] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0] +; NONEON-NOSVE-NEXT: fcvt s6, h17 +; NONEON-NOSVE-NEXT: fcvt s17, h22 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: mov h22, v3.h[5] +; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s19 +; NONEON-NOSVE-NEXT: mov h19, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmax s6, s17, s6 +; NONEON-NOSVE-NEXT: mov h17, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fmax s18, s20, s18 +; NONEON-NOSVE-NEXT: mov h20, v3.h[6] +; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt s7, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt s16, h21 +; NONEON-NOSVE-NEXT: fcvt s21, h24 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcvt s23, h25 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: mov h3, v3.h[7] +; NONEON-NOSVE-NEXT: fmax s7, s22, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmax s16, s21, s16 +; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0] +; NONEON-NOSVE-NEXT: fmax s6, s19, s17 +; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0] +; NONEON-NOSVE-NEXT: fmax s17, s23, s20 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fmax s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s17 +; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0] +; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: stp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -325,6 +1011,11 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmax v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -338,6 +1029,11 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmax v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -353,6 +1049,15 @@ define void @fmax_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmax v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmax v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -365,6 +1070,11 @@ define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmax d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmax d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -378,6 +1088,11 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmax v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -393,6 +1108,15 @@ define void @fmax_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmax_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmax v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fmax v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2) @@ -413,6 +1137,38 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[2] +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: fcvt s7, h0 +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s2, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h4 +; NONEON-NOSVE-NEXT: fcvt s4, h5 +; NONEON-NOSVE-NEXT: fmin s5, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v0.h[3] +; NONEON-NOSVE-NEXT: fmin s3, s4, s3 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s5 +; NONEON-NOSVE-NEXT: fcvt s4, h6 +; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h2, s3 +; NONEON-NOSVE-NEXT: fmin s1, s4, s1 +; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2) ret <4 x half> %res } @@ -426,6 +1182,64 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h7, v0.h[2] +; NONEON-NOSVE-NEXT: mov h16, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fmin s3, s3, s2 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s4 +; NONEON-NOSVE-NEXT: fmin s4, s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: mov h7, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fmin s5, s5, s16 +; NONEON-NOSVE-NEXT: mov h16, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt s3, h6 +; NONEON-NOSVE-NEXT: fcvt s6, h7 +; NONEON-NOSVE-NEXT: mov h7, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h5, s5 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0] +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fmin s3, s6, s3 +; NONEON-NOSVE-NEXT: mov h6, v0.h[6] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h6 +; NONEON-NOSVE-NEXT: fmin s6, s16, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0] +; NONEON-NOSVE-NEXT: fmin s4, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h3, s6 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0] +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2) ret <8 x half> %res } @@ -441,6 +1255,119 @@ define void @fmin_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: mov h7, v0.h[1] +; NONEON-NOSVE-NEXT: mov h16, v0.h[2] +; NONEON-NOSVE-NEXT: mov h18, v2.h[1] +; NONEON-NOSVE-NEXT: mov h5, v1.h[1] +; NONEON-NOSVE-NEXT: mov h6, v1.h[2] +; NONEON-NOSVE-NEXT: mov h17, v3.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s19, h0 +; NONEON-NOSVE-NEXT: fcvt s20, h3 +; NONEON-NOSVE-NEXT: fcvt s21, h2 +; NONEON-NOSVE-NEXT: mov h22, v3.h[2] +; NONEON-NOSVE-NEXT: mov h23, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fmin s4, s19, s4 +; NONEON-NOSVE-NEXT: mov h19, v0.h[3] +; NONEON-NOSVE-NEXT: mov h24, v3.h[3] +; NONEON-NOSVE-NEXT: fmin s20, s21, s20 +; NONEON-NOSVE-NEXT: fcvt s21, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov h23, v2.h[3] +; NONEON-NOSVE-NEXT: mov h25, v2.h[6] +; NONEON-NOSVE-NEXT: fmin s5, s7, s5 +; NONEON-NOSVE-NEXT: mov h7, v1.h[3] +; NONEON-NOSVE-NEXT: fmin s6, s16, s6 +; NONEON-NOSVE-NEXT: fmin s16, s18, s17 +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s18, h19 +; NONEON-NOSVE-NEXT: fcvt s19, h24 +; NONEON-NOSVE-NEXT: mov h24, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h17, s5 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcvt h5, s20 +; NONEON-NOSVE-NEXT: fmin s20, s22, s21 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt s21, h23 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: mov h22, v0.h[4] +; NONEON-NOSVE-NEXT: mov h23, v2.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0] +; NONEON-NOSVE-NEXT: mov h17, v1.h[4] +; NONEON-NOSVE-NEXT: fmin s7, s18, s7 +; NONEON-NOSVE-NEXT: mov h18, v3.h[4] +; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s20 +; NONEON-NOSVE-NEXT: fmin s19, s21, s19 +; NONEON-NOSVE-NEXT: fcvt s20, h23 +; NONEON-NOSVE-NEXT: mov h21, v1.h[5] +; NONEON-NOSVE-NEXT: mov h23, v2.h[5] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0] +; NONEON-NOSVE-NEXT: fcvt s6, h17 +; NONEON-NOSVE-NEXT: fcvt s17, h22 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fcvt s18, h18 +; NONEON-NOSVE-NEXT: mov h22, v3.h[5] +; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h16, s19 +; NONEON-NOSVE-NEXT: mov h19, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmin s6, s17, s6 +; NONEON-NOSVE-NEXT: mov h17, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fmin s18, s20, s18 +; NONEON-NOSVE-NEXT: mov h20, v3.h[6] +; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt s7, h22 +; NONEON-NOSVE-NEXT: fcvt s22, h23 +; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt s16, h21 +; NONEON-NOSVE-NEXT: fcvt s21, h24 +; NONEON-NOSVE-NEXT: fcvt s19, h19 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcvt s23, h25 +; NONEON-NOSVE-NEXT: fcvt h18, s18 +; NONEON-NOSVE-NEXT: fcvt s20, h20 +; NONEON-NOSVE-NEXT: mov h3, v3.h[7] +; NONEON-NOSVE-NEXT: fmin s7, s22, s7 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fmin s16, s21, s16 +; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0] +; NONEON-NOSVE-NEXT: fmin s6, s19, s17 +; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0] +; NONEON-NOSVE-NEXT: fmin s17, s23, s20 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt h7, s7 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h16, s16 +; NONEON-NOSVE-NEXT: fcvt h6, s6 +; NONEON-NOSVE-NEXT: fmin s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s17 +; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0] +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0] +; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0] +; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0] +; NONEON-NOSVE-NEXT: stp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2) @@ -457,6 +1384,11 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmin v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2) ret <2 x float> %res } @@ -470,6 +1402,11 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmin v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2) ret <4 x float> %res } @@ -485,6 +1422,15 @@ define void @fmin_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmin v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmin v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2) @@ -497,6 +1443,11 @@ define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmin d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmin d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2) ret <1 x double> %res } @@ -510,6 +1461,11 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmin v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2) ret <2 x double> %res } @@ -525,6 +1481,15 @@ define void @fmin_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmin_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmin v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fmin v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll index e239ff5e35fd36..f1561011e21812 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -26,6 +27,30 @@ define half @fadda_v4f16(half %start, <4 x half> %a) { ; NO-FA64-NEXT: fadd h0, h0, h2 ; NO-FA64-NEXT: fadd h0, h0, h1 ; NO-FA64-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) ret half %res } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll index 78ae7bb6cf30ab..a0a7dad835662e 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,30 @@ define half @fadda_v4f16(half %start, <4 x half> %a) { ; CHECK-NEXT: fadd h0, h0, h2 ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) ret half %res } @@ -43,6 +68,49 @@ define half @fadda_v8f16(half %start, <8 x half> %a) { ; CHECK-NEXT: fadd h0, h0, h2 ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a) ret half %res } @@ -83,6 +151,90 @@ define half @fadda_v16f16(half %start, ptr %a) { ; CHECK-NEXT: fadd h0, h0, h2 ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: fcvt s2, h1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op) ret half %res @@ -96,6 +248,14 @@ define float @fadda_v2f32(float %start, <2 x float> %a) { ; CHECK-NEXT: mov z1.s, z1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov s2, v1.s[1] +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a) ret float %res } @@ -112,6 +272,17 @@ define float @fadda_v4f32(float %start, <4 x float> %a) { ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov s2, v1.s[1] +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: mov s3, v1.s[2] +; NONEON-NOSVE-NEXT: mov s1, v1.s[3] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s3 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a) ret float %res } @@ -136,6 +307,26 @@ define float @fadda_v8f32(float %start, ptr %a) { ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: mov s2, v1.s[1] +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: mov s3, v1.s[2] +; NONEON-NOSVE-NEXT: mov s1, v1.s[3] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s3 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: mov s2, v1.s[1] +; NONEON-NOSVE-NEXT: mov s3, v1.s[2] +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: mov s1, v1.s[3] +; NONEON-NOSVE-NEXT: fadd s0, s0, s2 +; NONEON-NOSVE-NEXT: fadd s0, s0, s3 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op) ret float %res @@ -146,6 +337,11 @@ define double @fadda_v1f64(double %start, <1 x double> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a) ret double %res } @@ -158,6 +354,13 @@ define double @fadda_v2f64(double %start, <2 x double> %a) { ; CHECK-NEXT: mov z1.d, z1.d[1] ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov d2, v1.d[1] +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: fadd d0, d0, d2 +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a) ret double %res } @@ -174,6 +377,17 @@ define double @fadda_v4f64(double %start, ptr %a) { ; CHECK-NEXT: mov z1.d, z1.d[1] ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadda_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q3, q1, [x0] +; NONEON-NOSVE-NEXT: mov d2, v3.d[1] +; NONEON-NOSVE-NEXT: fadd d0, d0, d3 +; NONEON-NOSVE-NEXT: fadd d0, d0, d2 +; NONEON-NOSVE-NEXT: mov d2, v1.d[1] +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: fadd d0, d0, d2 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op) ret double %res @@ -191,6 +405,30 @@ define half @faddv_v4f16(half %start, <4 x half> %a) { ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s3, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s3, s2 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: mov h1, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a) ret half %res } @@ -203,6 +441,49 @@ define half @faddv_v8f16(half %start, <8 x half> %a) { ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s3, h1 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s3, s2 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[6] +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fadd s1, s2, s1 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a) ret half %res } @@ -216,6 +497,58 @@ define half @faddv_v16f16(half %start, ptr %a) { ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fadd v3.4s, v4.4s, v3.4s +; NONEON-NOSVE-NEXT: fadd v1.4s, v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v1.4s +; NONEON-NOSVE-NEXT: mov h1, v2.h[1] +; NONEON-NOSVE-NEXT: fcvt s3, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s3, s1 +; NONEON-NOSVE-NEXT: mov h3, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s3 +; NONEON-NOSVE-NEXT: mov h3, v2.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s3 +; NONEON-NOSVE-NEXT: mov h3, v2.h[4] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s3 +; NONEON-NOSVE-NEXT: mov h3, v2.h[5] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s3 +; NONEON-NOSVE-NEXT: mov h3, v2.h[6] +; NONEON-NOSVE-NEXT: mov h2, v2.h[7] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op) ret half %res @@ -229,6 +562,12 @@ define float @faddv_v2f32(float %start, <2 x float> %a) { ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: faddp s1, v1.2s +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ret %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a) ret float %res } @@ -241,6 +580,13 @@ define float @faddv_v4f32(float %start, <4 x float> %a) { ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: faddp v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: faddp s1, v1.2s +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ret %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a) ret float %res } @@ -254,6 +600,15 @@ define float @faddv_v8f32(float %start, ptr %a) { ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: fadd v1.4s, v2.4s, v1.4s +; NONEON-NOSVE-NEXT: faddp v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: faddp s1, v1.2s +; NONEON-NOSVE-NEXT: fadd s0, s0, s1 +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op) ret float %res @@ -264,6 +619,11 @@ define double @faddv_v1f64(double %start, <1 x double> %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a) ret double %res } @@ -276,6 +636,12 @@ define double @faddv_v2f64(double %start, <2 x double> %a) { ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: faddp d1, v1.2d +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a) ret double %res } @@ -289,6 +655,14 @@ define double @faddv_v4f64(double %start, ptr %a) { ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: faddv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q1, [x0] +; NONEON-NOSVE-NEXT: fadd v1.2d, v2.2d, v1.2d +; NONEON-NOSVE-NEXT: faddp d1, v1.2d +; NONEON-NOSVE-NEXT: fadd d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op) ret double %res @@ -306,6 +680,26 @@ define half @fmaxv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a) ret half %res } @@ -318,6 +712,45 @@ define half @fmaxv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a) ret half %res } @@ -331,6 +764,85 @@ define half @fmaxv_v16f16(ptr %a) { ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmaxnm s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fmaxnm s2, s4, s2 +; NONEON-NOSVE-NEXT: mov h4, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[4] +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmaxnm s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[5] +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fmaxnm s3, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op) ret half %res @@ -344,6 +856,11 @@ define float @fmaxv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnmp s0, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a) ret float %res } @@ -356,6 +873,11 @@ define float @fmaxv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnmv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) ret float %res } @@ -369,6 +891,13 @@ define float @fmaxv_v8f32(ptr %a) { ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fmaxnm v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmaxnmv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op) ret float %res @@ -378,6 +907,10 @@ define double @fmaxv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fmaxv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a) ret double %res } @@ -390,6 +923,11 @@ define double @fmaxv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fmaxnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxnmp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) ret double %res } @@ -403,6 +941,13 @@ define double @fmaxv_v4f64(ptr %a) { ; CHECK-NEXT: fmaxnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaxv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fmaxnm v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fmaxnmp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op) ret double %res @@ -420,6 +965,26 @@ define half @fminv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a) ret half %res } @@ -432,6 +997,45 @@ define half @fminv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a) ret half %res } @@ -445,6 +1049,85 @@ define half @fminv_v16f16(ptr %a) { ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fminnm s2, s3, s2 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fminnm s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fminnm s2, s4, s2 +; NONEON-NOSVE-NEXT: mov h4, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fminnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[4] +; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fminnm s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[5] +; NONEON-NOSVE-NEXT: fminnm s2, s2, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fminnm s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fminnm s0, s0, s1 +; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fminnm s3, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fminnm s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fminnm s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op) ret half %res @@ -458,6 +1141,11 @@ define float @fminv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnmp s0, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a) ret float %res } @@ -470,6 +1158,11 @@ define float @fminv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnmv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) ret float %res } @@ -483,6 +1176,13 @@ define float @fminv_v8f32(ptr %a) { ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fminnm v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fminnmv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op) ret float %res @@ -492,6 +1192,10 @@ define double @fminv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fminv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a) ret double %res } @@ -504,6 +1208,11 @@ define double @fminv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fminnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminnmp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a) ret double %res } @@ -517,6 +1226,13 @@ define double @fminv_v4f64(ptr %a) { ; CHECK-NEXT: fminnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fminnm v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fminnmp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op) ret double %res @@ -534,6 +1250,26 @@ define half @fmaximumv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a) ret half %res } @@ -546,6 +1282,45 @@ define half @fmaximumv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a) ret half %res } @@ -559,6 +1334,85 @@ define half @fmaximumv_v16f16(ptr %a) { ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fmax s2, s3, s2 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmax s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fmax s2, s4, s2 +; NONEON-NOSVE-NEXT: mov h4, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmax s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[4] +; NONEON-NOSVE-NEXT: fmax s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmax s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[5] +; NONEON-NOSVE-NEXT: fmax s2, s2, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmax s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fmax s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmax s0, s0, s1 +; NONEON-NOSVE-NEXT: fmax s2, s2, s3 +; NONEON-NOSVE-NEXT: fmax s3, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmax s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmax s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op) ret half %res @@ -572,6 +1426,11 @@ define float @fmaximumv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxp s0, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a) ret float %res } @@ -584,6 +1443,11 @@ define float @fmaximumv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a) ret float %res } @@ -597,6 +1461,13 @@ define float @fmaximumv_v8f32(ptr %a) { ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fmax v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fmaxv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op) ret float %res @@ -606,6 +1477,10 @@ define double @fmaximumv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fmaximumv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a) ret double %res } @@ -618,6 +1493,11 @@ define double @fmaximumv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fmaxv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmaxp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a) ret double %res } @@ -631,6 +1511,13 @@ define double @fmaximumv_v4f64(ptr %a) { ; CHECK-NEXT: fmaxv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fmaximumv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fmax v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fmaxp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op) ret double %res @@ -648,6 +1535,26 @@ define half @fminimumv_v4f16(<4 x half> %a) { ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: mov h0, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a) ret half %res } @@ -660,6 +1567,45 @@ define half @fminimumv_v8f16(<8 x half> %a) { ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s2, s1 +; NONEON-NOSVE-NEXT: mov h2, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s1, s2 +; NONEON-NOSVE-NEXT: mov h2, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s1, s1, s2 +; NONEON-NOSVE-NEXT: fcvt h1, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a) ret half %res } @@ -673,6 +1619,85 @@ define half @fminimumv_v16f16(ptr %a) { ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s4, h1 +; NONEON-NOSVE-NEXT: fcvt s5, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fmin s2, s3, s2 +; NONEON-NOSVE-NEXT: mov h3, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmin s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[3] +; NONEON-NOSVE-NEXT: fmin s2, s4, s2 +; NONEON-NOSVE-NEXT: mov h4, v1.h[3] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmin s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[4] +; NONEON-NOSVE-NEXT: fmin s2, s2, s3 +; NONEON-NOSVE-NEXT: mov h3, v1.h[4] +; NONEON-NOSVE-NEXT: fcvt h4, s4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmin s3, s5, s3 +; NONEON-NOSVE-NEXT: mov h5, v0.h[5] +; NONEON-NOSVE-NEXT: fmin s2, s2, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[5] +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmin s4, s5, s4 +; NONEON-NOSVE-NEXT: mov h5, v0.h[6] +; NONEON-NOSVE-NEXT: mov h0, v0.h[7] +; NONEON-NOSVE-NEXT: fmin s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h3, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[6] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: mov h1, v1.h[7] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fmin s0, s0, s1 +; NONEON-NOSVE-NEXT: fmin s2, s2, s3 +; NONEON-NOSVE-NEXT: fmin s3, s5, s4 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: fcvt h2, s2 +; NONEON-NOSVE-NEXT: fcvt h3, s3 +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fmin s2, s2, s3 +; NONEON-NOSVE-NEXT: fcvt h1, s2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fmin s0, s1, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op) ret half %res @@ -686,6 +1711,11 @@ define float @fminimumv_v2f32(<2 x float> %a) { ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminp s0, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a) ret float %res } @@ -698,6 +1728,11 @@ define float @fminimumv_v4f32(<4 x float> %a) { ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a) ret float %res } @@ -711,6 +1746,13 @@ define float @fminimumv_v8f32(ptr %a) { ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fmin v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fminv s0, v0.4s +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op) ret float %res @@ -720,6 +1762,10 @@ define double @fminimumv_v1f64(<1 x double> %a) { ; CHECK-LABEL: fminimumv_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a) ret double %res } @@ -732,6 +1778,11 @@ define double @fminimumv_v2f64(<2 x double> %a) { ; CHECK-NEXT: fminv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fminp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a) ret double %res } @@ -745,6 +1796,13 @@ define double @fminimumv_v4f64(ptr %a) { ; CHECK-NEXT: fminv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fminimumv_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: fmin v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fminp d0, v0.2d +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op) ret double %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll index 412c27cb82f1d4..6af2b885ace08f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,13 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.ceil.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -28,6 +36,13 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -40,6 +55,16 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frintp v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frintp v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -53,6 +78,24 @@ define void @frintp_v16f16(ptr %a) { ; CHECK-NEXT: frintp z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frintp v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frintp v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintp v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -67,6 +110,11 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintp v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -79,6 +127,11 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -92,6 +145,14 @@ define void @frintp_v8f32(ptr %a) { ; CHECK-NEXT: frintp z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintp v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -103,6 +164,11 @@ define <1 x double> @frintp_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintp d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintp d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -115,6 +181,11 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintp z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintp v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -128,6 +199,14 @@ define void @frintp_v4f64(ptr %a) { ; CHECK-NEXT: frintp z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintp_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintp v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frintp v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -146,6 +225,13 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.floor.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -158,6 +244,13 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -170,6 +263,16 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frintm v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frintm v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -183,6 +286,24 @@ define void @frintm_v16f16(ptr %a) { ; CHECK-NEXT: frintm z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frintm v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frintm v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintm v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -197,6 +318,11 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintm v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -209,6 +335,11 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -222,6 +353,14 @@ define void @frintm_v8f32(ptr %a) { ; CHECK-NEXT: frintm z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintm v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -233,6 +372,11 @@ define <1 x double> @frintm_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintm d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintm d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -245,6 +389,11 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintm z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintm v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -258,6 +407,14 @@ define void @frintm_v4f64(ptr %a) { ; CHECK-NEXT: frintm z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintm v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frintm v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -276,6 +433,13 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) { ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -288,6 +452,13 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) { ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -300,6 +471,16 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) { ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frinti v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frinti v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -313,6 +494,24 @@ define void @frinti_v16f16(ptr %a) { ; CHECK-NEXT: frinti z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frinti v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frinti v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frinti v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -327,6 +526,11 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) { ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinti v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -339,6 +543,11 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) { ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -352,6 +561,14 @@ define void @frinti_v8f32(ptr %a) { ; CHECK-NEXT: frinti z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frinti v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -363,6 +580,11 @@ define <1 x double> @frinti_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frinti d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinti d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -375,6 +597,11 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) { ; CHECK-NEXT: frinti z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinti v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -388,6 +615,14 @@ define void @frinti_v4f64(ptr %a) { ; CHECK-NEXT: frinti z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinti_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frinti v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frinti v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -406,6 +641,13 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.rint.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -418,6 +660,13 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -430,6 +679,16 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frintx v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frintx v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -443,6 +702,24 @@ define void @frintx_v16f16(ptr %a) { ; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frintx v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frintx v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintx v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -457,6 +734,11 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintx v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -469,6 +751,11 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -482,6 +769,14 @@ define void @frintx_v8f32(ptr %a) { ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintx v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -493,6 +788,11 @@ define <1 x double> @frintx_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintx d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintx d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -505,6 +805,11 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintx v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -518,6 +823,14 @@ define void @frintx_v4f64(ptr %a) { ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintx_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintx v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frintx v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -536,6 +849,13 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) { ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.round.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -548,6 +868,13 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) { ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -560,6 +887,16 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) { ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frinta v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frinta v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -573,6 +910,24 @@ define void @frinta_v16f16(ptr %a) { ; CHECK-NEXT: frinta z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frinta v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frinta v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frinta v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -587,6 +942,11 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) { ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinta v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -599,6 +959,11 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) { ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -612,6 +977,14 @@ define void @frinta_v8f32(ptr %a) { ; CHECK-NEXT: frinta z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frinta v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -623,6 +996,11 @@ define <1 x double> @frinta_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frinta d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinta d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -635,6 +1013,11 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) { ; CHECK-NEXT: frinta z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frinta v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -648,6 +1031,14 @@ define void @frinta_v4f64(ptr %a) { ; CHECK-NEXT: frinta z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frinta_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frinta v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frinta v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -666,6 +1057,13 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -678,6 +1076,13 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -690,6 +1095,16 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frintn v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frintn v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -703,6 +1118,24 @@ define void @frintn_v16f16(ptr %a) { ; CHECK-NEXT: frintn z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frintn v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frintn v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintn v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -717,6 +1150,11 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintn v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -729,6 +1167,11 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -742,6 +1185,14 @@ define void @frintn_v8f32(ptr %a) { ; CHECK-NEXT: frintn z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintn v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -753,6 +1204,11 @@ define <1 x double> @frintn_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintn d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintn d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -765,6 +1221,11 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintn z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintn v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -778,6 +1239,14 @@ define void @frintn_v4f64(ptr %a) { ; CHECK-NEXT: frintn z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintn_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintn v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frintn v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a @@ -796,6 +1265,13 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) { ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <2 x half> @llvm.trunc.v2f16(<2 x half> %op) ret <2 x half> %res } @@ -808,6 +1284,13 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) { ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op) ret <4 x half> %res } @@ -820,6 +1303,16 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) { ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h +; NONEON-NOSVE-NEXT: frintz v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s +; NONEON-NOSVE-NEXT: frintz v1.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op) ret <8 x half> %res } @@ -833,6 +1326,24 @@ define void @frintz_v16f16(ptr %a) { ; CHECK-NEXT: frintz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: frintz v2.4s, v2.4s +; NONEON-NOSVE-NEXT: frintz v3.4s, v3.4s +; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintz v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x half>, ptr %a %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op) store <16 x half> %res, ptr %a @@ -847,6 +1358,11 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) { ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintz v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op) ret <2 x float> %res } @@ -859,6 +1375,11 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) { ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op) ret <4 x float> %res } @@ -872,6 +1393,14 @@ define void @frintz_v8f32(ptr %a) { ; CHECK-NEXT: frintz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s +; NONEON-NOSVE-NEXT: frintz v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x float>, ptr %a %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op) store <8 x float> %res, ptr %a @@ -883,6 +1412,11 @@ define <1 x double> @frintz_v1f64(<1 x double> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintz d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintz d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op) ret <1 x double> %res } @@ -895,6 +1429,11 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) { ; CHECK-NEXT: frintz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: frintz v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op) ret <2 x double> %res } @@ -908,6 +1447,14 @@ define void @frintz_v4f64(ptr %a) { ; CHECK-NEXT: frintz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: frintz_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: frintz v0.2d, v0.2d +; NONEON-NOSVE-NEXT: frintz v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x double>, ptr %a %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op) store <4 x double> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll index 89697cde848b53..824419b31a5a83 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,14 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.4h, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2 ret <2 x half> %sel } @@ -32,6 +41,14 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.4h, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2 ret <4 x half> %sel } @@ -48,6 +65,14 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.8h, w8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2 ret <8 x half> %sel } @@ -67,6 +92,20 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <16 x half>, ptr %a %op2 = load volatile <16 x half>, ptr %b %sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2 @@ -86,6 +125,14 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.2s, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2 ret <2 x float> %sel } @@ -102,6 +149,14 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.4s, w8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2 ret <4 x float> %sel } @@ -121,6 +176,20 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <8 x float>, ptr %a %op2 = load volatile <8 x float>, ptr %b %sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2 @@ -134,6 +203,14 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: fmov d2, x8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2 ret <1 x double> %sel } @@ -151,6 +228,14 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: dup v2.2d, x8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2 ret <2 x double> %sel } @@ -171,6 +256,20 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <4 x double>, ptr %a %op2 = load volatile <4 x double>, ptr %b %sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll index 5840ffb20994ce..c853bdc5af8db0 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,13 @@ define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) { ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x half> %op1 to <4 x i16> ret <4 x i16> %res } @@ -27,6 +35,21 @@ define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptoui <8 x half> %op1 to <8 x i16> store <8 x i16> %res, ptr %b @@ -42,6 +65,27 @@ define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzu v2.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtzu v3.4s, v3.4s +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -61,6 +105,13 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x half> %op1 to <2 x i32> ret <2 x i32> %res } @@ -74,6 +125,12 @@ define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x half> %op1 to <4 x i32> ret <4 x i32> %res } @@ -90,6 +147,20 @@ define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptoui <8 x half> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -114,6 +185,26 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzu v2.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtzu v3.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i32> store <16 x i32> %res, ptr %b @@ -130,6 +221,13 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) { ; CHECK-NEXT: fcvtzu x8, h0 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x half> %op1 to <1 x i64> ret <1 x i64> %res } @@ -145,6 +243,18 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: fcvtzu x9, s1 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x half> %op1 to <2 x i64> ret <2 x i64> %res } @@ -167,6 +277,27 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: mov h1, v0.h[2] +; NONEON-NOSVE-NEXT: mov h2, v0.h[3] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: fcvtzu x8, s1 +; NONEON-NOSVE-NEXT: fcvtzu x10, s2 +; NONEON-NOSVE-NEXT: fcvtzu x11, s3 +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: mov v0.d[1], x10 +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fptoui <4 x half> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -204,6 +335,47 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1, #32] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: mov h1, v0.h[2] +; NONEON-NOSVE-NEXT: mov h3, v0.h[3] +; NONEON-NOSVE-NEXT: mov h4, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov h5, v2.h[2] +; NONEON-NOSVE-NEXT: mov h6, v2.h[3] +; NONEON-NOSVE-NEXT: mov h7, v2.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvtzu x9, s0 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvtzu x13, s2 +; NONEON-NOSVE-NEXT: fcvtzu x8, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h7 +; NONEON-NOSVE-NEXT: fcvtzu x10, s3 +; NONEON-NOSVE-NEXT: fcvtzu x11, s4 +; NONEON-NOSVE-NEXT: fcvtzu x12, s5 +; NONEON-NOSVE-NEXT: fcvtzu x14, s6 +; NONEON-NOSVE-NEXT: fmov d3, x13 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: fcvtzu x8, s1 +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: fmov d2, x12 +; NONEON-NOSVE-NEXT: mov v0.d[1], x10 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: mov v3.d[1], x8 +; NONEON-NOSVE-NEXT: mov v2.d[1], x14 +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptoui <8 x half> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -264,6 +436,80 @@ define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q5, q2, [x1, #96] ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s3, h1 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #24] +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: mov h7, v0.h[2] +; NONEON-NOSVE-NEXT: mov h16, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s6, h0 +; NONEON-NOSVE-NEXT: mov h0, v0.h[1] +; NONEON-NOSVE-NEXT: mov h1, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s17, h4 +; NONEON-NOSVE-NEXT: mov h18, v4.h[2] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvtzu x8, s3 +; NONEON-NOSVE-NEXT: fcvt s3, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h7 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: mov h16, v4.h[3] +; NONEON-NOSVE-NEXT: fcvtzu x9, s6 +; NONEON-NOSVE-NEXT: ldr d6, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: mov h4, v4.h[1] +; NONEON-NOSVE-NEXT: fcvtzu x11, s2 +; NONEON-NOSVE-NEXT: mov h2, v6.h[2] +; NONEON-NOSVE-NEXT: fcvtzu x10, s17 +; NONEON-NOSVE-NEXT: fcvtzu x13, s5 +; NONEON-NOSVE-NEXT: fcvtzu x12, s3 +; NONEON-NOSVE-NEXT: mov h3, v6.h[3] +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov h5, v6.h[1] +; NONEON-NOSVE-NEXT: fcvt s17, h18 +; NONEON-NOSVE-NEXT: fcvtzu x14, s7 +; NONEON-NOSVE-NEXT: fmov d7, x8 +; NONEON-NOSVE-NEXT: fcvtzu x8, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fmov d0, x11 +; NONEON-NOSVE-NEXT: fcvtzu x11, s1 +; NONEON-NOSVE-NEXT: fmov d1, x13 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvtzu x13, s16 +; NONEON-NOSVE-NEXT: fmov d16, x9 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvtzu x15, s17 +; NONEON-NOSVE-NEXT: mov v0.d[1], x12 +; NONEON-NOSVE-NEXT: mov v1.d[1], x14 +; NONEON-NOSVE-NEXT: fcvtzu x9, s2 +; NONEON-NOSVE-NEXT: mov v16.d[1], x8 +; NONEON-NOSVE-NEXT: fcvtzu x8, s6 +; NONEON-NOSVE-NEXT: fcvtzu x14, s4 +; NONEON-NOSVE-NEXT: fcvtzu x12, s3 +; NONEON-NOSVE-NEXT: mov v7.d[1], x11 +; NONEON-NOSVE-NEXT: fmov d3, x10 +; NONEON-NOSVE-NEXT: fcvtzu x11, s5 +; NONEON-NOSVE-NEXT: fmov d2, x15 +; NONEON-NOSVE-NEXT: stp q16, q1, [x1, #64] +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: fmov d4, x8 +; NONEON-NOSVE-NEXT: stp q7, q0, [x1] +; NONEON-NOSVE-NEXT: mov v2.d[1], x13 +; NONEON-NOSVE-NEXT: mov v3.d[1], x14 +; NONEON-NOSVE-NEXT: mov v1.d[1], x12 +; NONEON-NOSVE-NEXT: mov v4.d[1], x11 +; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #96] +; NONEON-NOSVE-NEXT: stp q4, q1, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i64> store <16 x i64> %res, ptr %b @@ -282,6 +528,11 @@ define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x float> %op1 to <2 x i16> ret <2 x i16> %res } @@ -295,6 +546,12 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x float> %op1 to <4 x i16> ret <4 x i16> %res } @@ -312,6 +569,14 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i16> ret <8 x i16> %res @@ -336,6 +601,19 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f32_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzu v3.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtzu v2.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x float>, ptr %a %res = fptoui <16 x float> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -354,6 +632,11 @@ define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzu v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x float> %op1 to <2 x i32> ret <2 x i32> %res } @@ -366,6 +649,11 @@ define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptoui <4 x float> %op1 to <4 x i32> ret <4 x i32> %res } @@ -379,6 +667,14 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -398,6 +694,13 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f32_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x float> %op1 to <1 x i64> ret <1 x i64> %res } @@ -411,6 +714,12 @@ define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x float> %op1 to <2 x i64> ret <2 x i64> %res } @@ -427,6 +736,20 @@ define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fptoui <4 x float> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -451,6 +774,26 @@ define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s +; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s +; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzu v2.2d, v2.2d +; NONEON-NOSVE-NEXT: fcvtzu v3.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -468,6 +811,12 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) { ; CHECK-NEXT: mov z0.h, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i16> ret <1 x i16> %res } @@ -481,6 +830,12 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x double> %op1 to <2 x i16> ret <2 x i16> %res } @@ -509,6 +864,15 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptoui <4 x double> %op1 to <4 x i16> ret <4 x i16> %res @@ -552,6 +916,23 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) { ; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: adrp x8, .LCPI26_0 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d +; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d +; NONEON-NOSVE-NEXT: xtn v7.2s, v0.2d +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] +; NONEON-NOSVE-NEXT: xtn v6.2s, v1.2d +; NONEON-NOSVE-NEXT: xtn v5.2s, v2.2d +; NONEON-NOSVE-NEXT: xtn v4.2s, v3.2d +; NONEON-NOSVE-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptoui <8 x double> %op1 to <8 x i16> ret <8 x i16> %res @@ -628,6 +1009,35 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v16f64_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] +; NONEON-NOSVE-NEXT: adrp x8, .LCPI27_0 +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d +; NONEON-NOSVE-NEXT: fcvtzs v5.2d, v5.2d +; NONEON-NOSVE-NEXT: fcvtzs v4.2d, v4.2d +; NONEON-NOSVE-NEXT: fcvtzs v6.2d, v6.2d +; NONEON-NOSVE-NEXT: fcvtzs v7.2d, v7.2d +; NONEON-NOSVE-NEXT: xtn v19.2s, v0.2d +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI27_0] +; NONEON-NOSVE-NEXT: xtn v23.2s, v3.2d +; NONEON-NOSVE-NEXT: xtn v18.2s, v1.2d +; NONEON-NOSVE-NEXT: xtn v22.2s, v2.2d +; NONEON-NOSVE-NEXT: xtn v17.2s, v5.2d +; NONEON-NOSVE-NEXT: xtn v21.2s, v6.2d +; NONEON-NOSVE-NEXT: xtn v16.2s, v4.2d +; NONEON-NOSVE-NEXT: xtn v20.2s, v7.2d +; NONEON-NOSVE-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; NONEON-NOSVE-NEXT: tbl v0.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x double>, ptr %a %res = fptoui <16 x double> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -647,6 +1057,13 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i32> ret <1 x i32> %res } @@ -660,6 +1077,12 @@ define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x double> %op1 to <2 x i32> ret <2 x i32> %res } @@ -677,6 +1100,14 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptoui <4 x double> %op1 to <4 x i32> ret <4 x i32> %res @@ -701,6 +1132,19 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzu v3.2d, v3.2d +; NONEON-NOSVE-NEXT: fcvtzu v2.2d, v2.2d +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptoui <8 x double> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -719,6 +1163,12 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzu x8, d0 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = fptoui <1 x double> %op1 to <1 x i64> ret <1 x i64> %res } @@ -731,6 +1181,11 @@ define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) { ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptoui <2 x double> %op1 to <2 x i64> ret <2 x i64> %res } @@ -744,6 +1199,14 @@ define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptoui <4 x double> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -762,6 +1225,13 @@ define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) { ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x half> %op1 to <4 x i16> ret <4 x i16> %res } @@ -774,6 +1244,21 @@ define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptosi <8 x half> %op1 to <8 x i16> store <8 x i16> %res, ptr %b @@ -789,6 +1274,27 @@ define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v2.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtzs v3.4s, v3.4s +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -808,6 +1314,13 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x half> %op1 to <2 x i32> ret <2 x i32> %res } @@ -821,6 +1334,12 @@ define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x half> %op1 to <4 x i32> ret <4 x i32> %res } @@ -837,6 +1356,20 @@ define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptosi <8 x half> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -861,6 +1394,26 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v2.4s, v2.4s +; NONEON-NOSVE-NEXT: fcvtzs v3.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i32> store <16 x i32> %res, ptr %b @@ -877,6 +1430,13 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) { ; CHECK-NEXT: fcvtzs x8, h0 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x half> %op1 to <1 x i64> ret <1 x i64> %res } @@ -893,6 +1453,18 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov h1, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: fcvtzs x9, s1 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x half> %op1 to <2 x i64> ret <2 x i64> %res } @@ -915,6 +1487,27 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: mov h1, v0.h[2] +; NONEON-NOSVE-NEXT: mov h2, v0.h[3] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: fcvtzs x8, s1 +; NONEON-NOSVE-NEXT: fcvtzs x10, s2 +; NONEON-NOSVE-NEXT: fcvtzs x11, s3 +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: mov v0.d[1], x10 +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %res = fptosi <4 x half> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -952,6 +1545,47 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1, #32] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: mov h1, v0.h[2] +; NONEON-NOSVE-NEXT: mov h3, v0.h[3] +; NONEON-NOSVE-NEXT: mov h4, v0.h[1] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: mov h5, v2.h[2] +; NONEON-NOSVE-NEXT: mov h6, v2.h[3] +; NONEON-NOSVE-NEXT: mov h7, v2.h[1] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvtzs x9, s0 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvtzs x13, s2 +; NONEON-NOSVE-NEXT: fcvtzs x8, s1 +; NONEON-NOSVE-NEXT: fcvt s1, h7 +; NONEON-NOSVE-NEXT: fcvtzs x10, s3 +; NONEON-NOSVE-NEXT: fcvtzs x11, s4 +; NONEON-NOSVE-NEXT: fcvtzs x12, s5 +; NONEON-NOSVE-NEXT: fcvtzs x14, s6 +; NONEON-NOSVE-NEXT: fmov d3, x13 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: fcvtzs x8, s1 +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: fmov d2, x12 +; NONEON-NOSVE-NEXT: mov v0.d[1], x10 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: mov v3.d[1], x8 +; NONEON-NOSVE-NEXT: mov v2.d[1], x14 +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fptosi <8 x half> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -1012,6 +1646,80 @@ define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q5, q2, [x1, #96] ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: mov h2, v1.h[2] +; NONEON-NOSVE-NEXT: fcvt s3, h1 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #24] +; NONEON-NOSVE-NEXT: mov h5, v1.h[3] +; NONEON-NOSVE-NEXT: mov h7, v0.h[2] +; NONEON-NOSVE-NEXT: mov h16, v0.h[3] +; NONEON-NOSVE-NEXT: fcvt s6, h0 +; NONEON-NOSVE-NEXT: mov h0, v0.h[1] +; NONEON-NOSVE-NEXT: mov h1, v1.h[1] +; NONEON-NOSVE-NEXT: fcvt s17, h4 +; NONEON-NOSVE-NEXT: mov h18, v4.h[2] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvtzs x8, s3 +; NONEON-NOSVE-NEXT: fcvt s3, h5 +; NONEON-NOSVE-NEXT: fcvt s5, h7 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: mov h16, v4.h[3] +; NONEON-NOSVE-NEXT: fcvtzs x9, s6 +; NONEON-NOSVE-NEXT: ldr d6, [sp, #8] +; NONEON-NOSVE-NEXT: fcvt s0, h0 +; NONEON-NOSVE-NEXT: fcvt s1, h1 +; NONEON-NOSVE-NEXT: mov h4, v4.h[1] +; NONEON-NOSVE-NEXT: fcvtzs x11, s2 +; NONEON-NOSVE-NEXT: mov h2, v6.h[2] +; NONEON-NOSVE-NEXT: fcvtzs x10, s17 +; NONEON-NOSVE-NEXT: fcvtzs x13, s5 +; NONEON-NOSVE-NEXT: fcvtzs x12, s3 +; NONEON-NOSVE-NEXT: mov h3, v6.h[3] +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov h5, v6.h[1] +; NONEON-NOSVE-NEXT: fcvt s17, h18 +; NONEON-NOSVE-NEXT: fcvtzs x14, s7 +; NONEON-NOSVE-NEXT: fmov d7, x8 +; NONEON-NOSVE-NEXT: fcvtzs x8, s0 +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fmov d0, x11 +; NONEON-NOSVE-NEXT: fcvtzs x11, s1 +; NONEON-NOSVE-NEXT: fmov d1, x13 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvtzs x13, s16 +; NONEON-NOSVE-NEXT: fmov d16, x9 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvtzs x15, s17 +; NONEON-NOSVE-NEXT: mov v0.d[1], x12 +; NONEON-NOSVE-NEXT: mov v1.d[1], x14 +; NONEON-NOSVE-NEXT: fcvtzs x9, s2 +; NONEON-NOSVE-NEXT: mov v16.d[1], x8 +; NONEON-NOSVE-NEXT: fcvtzs x8, s6 +; NONEON-NOSVE-NEXT: fcvtzs x14, s4 +; NONEON-NOSVE-NEXT: fcvtzs x12, s3 +; NONEON-NOSVE-NEXT: mov v7.d[1], x11 +; NONEON-NOSVE-NEXT: fmov d3, x10 +; NONEON-NOSVE-NEXT: fcvtzs x11, s5 +; NONEON-NOSVE-NEXT: fmov d2, x15 +; NONEON-NOSVE-NEXT: stp q16, q1, [x1, #64] +; NONEON-NOSVE-NEXT: fmov d1, x9 +; NONEON-NOSVE-NEXT: fmov d4, x8 +; NONEON-NOSVE-NEXT: stp q7, q0, [x1] +; NONEON-NOSVE-NEXT: mov v2.d[1], x13 +; NONEON-NOSVE-NEXT: mov v3.d[1], x14 +; NONEON-NOSVE-NEXT: mov v1.d[1], x12 +; NONEON-NOSVE-NEXT: mov v4.d[1], x11 +; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #96] +; NONEON-NOSVE-NEXT: stp q4, q1, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i64> store <16 x i64> %res, ptr %b @@ -1030,6 +1738,11 @@ define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x float> %op1 to <2 x i16> ret <2 x i16> %res } @@ -1043,6 +1756,12 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x float> %op1 to <4 x i16> ret <4 x i16> %res } @@ -1060,6 +1779,14 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i16> ret <8 x i16> %res @@ -1084,6 +1811,19 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f32_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v3.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtzs v2.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x float>, ptr %a %res = fptosi <16 x float> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -1102,6 +1842,11 @@ define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x float> %op1 to <2 x i32> ret <2 x i32> %res } @@ -1114,6 +1859,11 @@ define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = fptosi <4 x float> %op1 to <4 x i32> ret <4 x i32> %res } @@ -1127,6 +1877,14 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -1146,6 +1904,13 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f32_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x float> %op1 to <1 x i64> ret <1 x i64> %res } @@ -1159,6 +1924,12 @@ define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x float> %op1 to <2 x i64> ret <2 x i64> %res } @@ -1175,6 +1946,20 @@ define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %res = fptosi <4 x float> %op1 to <4 x i64> store <4 x i64> %res, ptr %b @@ -1199,6 +1984,26 @@ define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s +; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s +; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s +; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d +; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i64> store <8 x i64> %res, ptr %b @@ -1218,6 +2023,12 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) { ; CHECK-NEXT: mov z0.h, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs w8, d0 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i16> ret <1 x i16> %res } @@ -1231,6 +2042,12 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x double> %op1 to <2 x i16> ret <2 x i16> %res } @@ -1259,6 +2076,15 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptosi <4 x double> %op1 to <4 x i16> ret <4 x i16> %res @@ -1302,6 +2128,23 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) { ; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: adrp x8, .LCPI61_0 +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d +; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d +; NONEON-NOSVE-NEXT: xtn v7.2s, v0.2d +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI61_0] +; NONEON-NOSVE-NEXT: xtn v6.2s, v1.2d +; NONEON-NOSVE-NEXT: xtn v5.2s, v2.2d +; NONEON-NOSVE-NEXT: xtn v4.2s, v3.2d +; NONEON-NOSVE-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptosi <8 x double> %op1 to <8 x i16> ret <8 x i16> %res @@ -1378,6 +2221,35 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v16f64_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96] +; NONEON-NOSVE-NEXT: adrp x8, .LCPI62_0 +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d +; NONEON-NOSVE-NEXT: fcvtzs v5.2d, v5.2d +; NONEON-NOSVE-NEXT: fcvtzs v4.2d, v4.2d +; NONEON-NOSVE-NEXT: fcvtzs v6.2d, v6.2d +; NONEON-NOSVE-NEXT: fcvtzs v7.2d, v7.2d +; NONEON-NOSVE-NEXT: xtn v19.2s, v0.2d +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI62_0] +; NONEON-NOSVE-NEXT: xtn v23.2s, v3.2d +; NONEON-NOSVE-NEXT: xtn v18.2s, v1.2d +; NONEON-NOSVE-NEXT: xtn v22.2s, v2.2d +; NONEON-NOSVE-NEXT: xtn v17.2s, v5.2d +; NONEON-NOSVE-NEXT: xtn v21.2s, v6.2d +; NONEON-NOSVE-NEXT: xtn v16.2s, v4.2d +; NONEON-NOSVE-NEXT: xtn v20.2s, v7.2d +; NONEON-NOSVE-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; NONEON-NOSVE-NEXT: tbl v0.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x double>, ptr %a %res = fptosi <16 x double> %op1 to <16 x i16> store <16 x i16> %res, ptr %b @@ -1397,6 +2269,13 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i32> ret <1 x i32> %res } @@ -1410,6 +2289,12 @@ define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x double> %op1 to <2 x i32> ret <2 x i32> %res } @@ -1427,6 +2312,14 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptosi <4 x double> %op1 to <4 x i32> ret <4 x i32> %res @@ -1451,6 +2344,19 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d +; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptosi <8 x double> %op1 to <8 x i32> store <8 x i32> %res, ptr %b @@ -1469,6 +2375,12 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs x8, d0 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = fptosi <1 x double> %op1 to <1 x i64> ret <1 x i64> %res } @@ -1481,6 +2393,11 @@ define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) { ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = fptosi <2 x double> %op1 to <2 x i64> ret <2 x i64> %res } @@ -1494,6 +2411,14 @@ define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptosi <4 x double> %op1 to <4 x i64> store <4 x i64> %res, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll index c1c7b5c05f5d55..d3b09374676556 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -27,6 +28,14 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uzp1 v2.4h, v2.4h, v0.4h +; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15 +; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x half> %op1, <2 x half> %op2 ret <2 x half> %sel } @@ -45,6 +54,13 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15 +; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2 ret <4 x half> %sel } @@ -64,6 +80,14 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: shl v2.8h, v2.8h, #15 +; NONEON-NOSVE-NEXT: cmlt v2.8h, v2.8h, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2 ret <8 x half> %sel } @@ -80,6 +104,126 @@ define void @select_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: mov h2, v1.h[1] +; NONEON-NOSVE-NEXT: mov h3, v0.h[1] +; NONEON-NOSVE-NEXT: mov h4, v1.h[2] +; NONEON-NOSVE-NEXT: mov h5, v0.h[2] +; NONEON-NOSVE-NEXT: fcvt s6, h1 +; NONEON-NOSVE-NEXT: fcvt s7, h0 +; NONEON-NOSVE-NEXT: mov h16, v1.h[6] +; NONEON-NOSVE-NEXT: mov h17, v0.h[6] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: mov h2, v1.h[3] +; NONEON-NOSVE-NEXT: mov h3, v0.h[3] +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[4] +; NONEON-NOSVE-NEXT: mov h7, v0.h[4] +; NONEON-NOSVE-NEXT: fcvt s2, h2 +; NONEON-NOSVE-NEXT: fcvt s3, h3 +; NONEON-NOSVE-NEXT: csetm w14, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v1.h[5] +; NONEON-NOSVE-NEXT: mov h5, v0.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w12, eq +; NONEON-NOSVE-NEXT: fcmp s3, s2 +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: ldr q3, [x1, #16] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w11, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v1.h[7] +; NONEON-NOSVE-NEXT: mov h7, v0.h[7] +; NONEON-NOSVE-NEXT: mov h18, v3.h[3] +; NONEON-NOSVE-NEXT: csetm w13, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: mov h4, v3.h[1] +; NONEON-NOSVE-NEXT: mov h5, v2.h[1] +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: csetm w9, eq +; NONEON-NOSVE-NEXT: fcmp s17, s16 +; NONEON-NOSVE-NEXT: mov h16, v3.h[2] +; NONEON-NOSVE-NEXT: fcvt s4, h4 +; NONEON-NOSVE-NEXT: mov h17, v2.h[2] +; NONEON-NOSVE-NEXT: fcvt s5, h5 +; NONEON-NOSVE-NEXT: csetm w10, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: fcvt s6, h3 +; NONEON-NOSVE-NEXT: fcvt s7, h2 +; NONEON-NOSVE-NEXT: csetm w15, eq +; NONEON-NOSVE-NEXT: fcmp s5, s4 +; NONEON-NOSVE-NEXT: fmov s4, w14 +; NONEON-NOSVE-NEXT: csetm w16, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v2.h[3] +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: fcvt s16, h17 +; NONEON-NOSVE-NEXT: mov v4.h[1], w8 +; NONEON-NOSVE-NEXT: fcvt s17, h18 +; NONEON-NOSVE-NEXT: csetm w14, eq +; NONEON-NOSVE-NEXT: fmov s5, w14 +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcmp s16, s7 +; NONEON-NOSVE-NEXT: mov h7, v3.h[4] +; NONEON-NOSVE-NEXT: mov h16, v2.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[2], w12 +; NONEON-NOSVE-NEXT: mov v5.h[1], w16 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s6, s17 +; NONEON-NOSVE-NEXT: mov h17, v2.h[5] +; NONEON-NOSVE-NEXT: fcvt s6, h7 +; NONEON-NOSVE-NEXT: fcvt s7, h16 +; NONEON-NOSVE-NEXT: mov h16, v3.h[5] +; NONEON-NOSVE-NEXT: mov v4.h[3], w11 +; NONEON-NOSVE-NEXT: mov v5.h[2], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcvt s17, h17 +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov h6, v3.h[6] +; NONEON-NOSVE-NEXT: mov h7, v2.h[6] +; NONEON-NOSVE-NEXT: fcvt s16, h16 +; NONEON-NOSVE-NEXT: mov v4.h[4], w13 +; NONEON-NOSVE-NEXT: mov v5.h[3], w8 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcvt s6, h6 +; NONEON-NOSVE-NEXT: fcvt s7, h7 +; NONEON-NOSVE-NEXT: fcmp s17, s16 +; NONEON-NOSVE-NEXT: mov h16, v3.h[7] +; NONEON-NOSVE-NEXT: mov h17, v2.h[7] +; NONEON-NOSVE-NEXT: mov v5.h[4], w8 +; NONEON-NOSVE-NEXT: mov v4.h[5], w9 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: fcvt s6, h16 +; NONEON-NOSVE-NEXT: fcvt s7, h17 +; NONEON-NOSVE-NEXT: mov v5.h[5], w8 +; NONEON-NOSVE-NEXT: mov v4.h[6], w10 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: fcmp s7, s6 +; NONEON-NOSVE-NEXT: mov v5.h[6], w8 +; NONEON-NOSVE-NEXT: mov v4.h[7], w15 +; NONEON-NOSVE-NEXT: csetm w8, eq +; NONEON-NOSVE-NEXT: mov v5.h[7], w8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %mask = fcmp oeq <16 x half> %op1, %op2 @@ -102,6 +246,13 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.2s, v2.2s, #31 +; NONEON-NOSVE-NEXT: cmlt v2.2s, v2.2s, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2 ret <2 x float> %sel } @@ -121,6 +272,14 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: shl v2.4s, v2.4s, #31 +; NONEON-NOSVE-NEXT: cmlt v2.4s, v2.4s, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2 ret <4 x float> %sel } @@ -137,6 +296,18 @@ define void @select_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: fcmeq v4.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcmeq v5.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %mask = fcmp oeq <8 x float> %op1, %op2 @@ -151,6 +322,14 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1> ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: fmov d2, x8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2 ret <1 x double> %sel } @@ -170,6 +349,14 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: shl v2.2d, v2.2d, #63 +; NONEON-NOSVE-NEXT: cmlt v2.2d, v2.2d, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2 ret <2 x double> %sel } @@ -186,6 +373,18 @@ define void @select_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: fcmeq v4.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: fcmeq v5.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %mask = fcmp oeq <4 x double> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll index ff38db8c10c04b..ae97a266c6ff0d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -21,6 +22,14 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x i8> %op1, i8 5, i64 3 ret <4 x i8> %r } @@ -38,6 +47,14 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) { ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.b[7], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <8 x i8> %op1, i8 5, i64 7 ret <8 x i8> %r } @@ -55,6 +72,12 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) { ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.b[15], w8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <16 x i8> %op1, i8 5, i64 15 ret <16 x i8> %r } @@ -72,6 +95,12 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) { ; CHECK-NEXT: mov z1.b, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v1.b[15], w8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <32 x i8> %op1, i8 5, i64 31 ret <32 x i8> %r } @@ -90,6 +119,14 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.s[1], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x i16> %op1, i16 5, i64 1 ret <2 x i16> %r } @@ -107,6 +144,14 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x i16> %op1, i16 5, i64 3 ret <4 x i16> %r } @@ -124,6 +169,12 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.h[7], w8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <8 x i16> %op1, i16 5, i64 7 ret <8 x i16> %r } @@ -141,6 +192,12 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) { ; CHECK-NEXT: mov z1.h, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v1.h[7], w8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <16 x i16> %op1, i16 5, i64 15 ret <16 x i16> %r } @@ -159,6 +216,14 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.s[1], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x i32> %op1, i32 5, i64 1 ret <2 x i32> %r } @@ -176,6 +241,12 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.s[3], w8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x i32> %op1, i32 5, i64 3 ret <4 x i32> %r } @@ -193,6 +264,13 @@ define <8 x i32> @insertelement_v8i32(ptr %a) { ; CHECK-NEXT: mov z1.s, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v1.s[3], w8 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %r = insertelement <8 x i32> %op1, i32 5, i64 7 ret <8 x i32> %r @@ -205,6 +283,12 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) { ; CHECK-NEXT: mov z0.d, #5 // =0x5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <1 x i64> %op1, i64 5, i64 0 ret <1 x i64> %r } @@ -222,6 +306,12 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) { ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v0.d[1], x8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x i64> %op1, i64 5, i64 1 ret <2 x i64> %r } @@ -239,6 +329,13 @@ define <4 x i64> @insertelement_v4i64(ptr %a) { ; CHECK-NEXT: mov z1.d, p0/m, x8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5 +; NONEON-NOSVE-NEXT: mov v1.d[1], x8 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %r = insertelement <4 x i64> %op1, i64 5, i64 3 ret <4 x i64> %r @@ -257,6 +354,16 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0 +; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI14_0 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: ld1r { v1.4h }, [x8] +; NONEON-NOSVE-NEXT: mov v1.h[0], v0.h[0] +; NONEON-NOSVE-NEXT: fmov d0, d1 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x half> %op1, half 5.0, i64 1 ret <2 x half> %r } @@ -274,6 +381,15 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI15_0 +; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI15_0 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x half> %op1, half 5.0, i64 3 ret <4 x half> %r } @@ -291,6 +407,13 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) { ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI16_0 +; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI16_0 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[7], [x8] +; NONEON-NOSVE-NEXT: ret %r = insertelement <8 x half> %op1, half 5.0, i64 7 ret <8 x half> %r } @@ -308,6 +431,14 @@ define <16 x half> @insertelement_v16f16(ptr %a) { ; CHECK-NEXT: mov z1.h, p0/m, h2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: adrp x8, .LCPI17_0 +; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI17_0 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[7], [x8] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %r = insertelement <16 x half> %op1, half 5.0, i64 15 ret <16 x half> %r @@ -327,6 +458,14 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov s1, #5.00000000 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: mov v0.s[1], v1.s[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x float> %op1, float 5.0, i64 1 ret <2 x float> %r } @@ -344,6 +483,12 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) { ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov s1, #5.00000000 +; NONEON-NOSVE-NEXT: mov v0.s[3], v1.s[0] +; NONEON-NOSVE-NEXT: ret %r = insertelement <4 x float> %op1, float 5.0, i64 3 ret <4 x float> %r } @@ -361,6 +506,13 @@ define <8 x float> @insertelement_v8f32(ptr %a) { ; CHECK-NEXT: mov z1.s, p0/m, s2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov s2, #5.00000000 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: mov v1.s[3], v2.s[0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %r = insertelement <8 x float> %op1, float 5.0, i64 7 ret <8 x float> %r @@ -372,6 +524,12 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, #5.00000000 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %r = insertelement <1 x double> %op1, double 5.0, i64 0 ret <1 x double> %r } @@ -389,6 +547,12 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) { ; CHECK-NEXT: mov z0.d, p0/m, d1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov d1, #5.00000000 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %r = insertelement <2 x double> %op1, double 5.0, i64 1 ret <2 x double> %r } @@ -406,6 +570,14 @@ define <4 x double> @insertelement_v4f64(ptr %a) { ; CHECK-NEXT: mov z1.d, p0/m, d2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: insertelement_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov d0, #5.00000000 +; NONEON-NOSVE-NEXT: ldr q1, [x0, #16] +; NONEON-NOSVE-NEXT: mov v1.d[1], v0.d[0] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %r = insertelement <4 x double> %op1, double 5.0, i64 3 ret <4 x double> %r diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll index ee1706bc7c3549..1b438559e05380 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,11 @@ define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = add <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -28,6 +34,11 @@ define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = add <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -40,6 +51,11 @@ define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = add <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -53,6 +69,15 @@ define void @add_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = add <32 x i8> %op1, %op2 @@ -68,6 +93,11 @@ define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = add <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -80,6 +110,11 @@ define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = add <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -92,6 +127,11 @@ define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = add <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -105,6 +145,15 @@ define void @add_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = add <16 x i16> %op1, %op2 @@ -120,6 +169,11 @@ define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = add <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -132,6 +186,11 @@ define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = add <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -145,6 +204,15 @@ define void @add_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.s, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: add v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = add <8 x i32> %op1, %op2 @@ -160,6 +228,11 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = add <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -172,6 +245,11 @@ define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: add v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = add <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -185,6 +263,15 @@ define void @add_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: add v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = add <4 x i64> %op1, %op2 @@ -213,6 +300,11 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE2-NEXT: mul z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = mul <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -234,6 +326,11 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE2-NEXT: mul z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = mul <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -255,6 +352,11 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE2-NEXT: mul z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = mul <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -279,6 +381,15 @@ define void @mul_v32i8(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: mul v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: mul v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = mul <32 x i8> %op1, %op2 @@ -303,6 +414,11 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE2-NEXT: mul z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = mul <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -324,6 +440,11 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE2-NEXT: mul z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = mul <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -345,6 +466,11 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE2-NEXT: mul z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = mul <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -369,6 +495,15 @@ define void @mul_v16i16(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: mul v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: mul v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = mul <16 x i16> %op1, %op2 @@ -393,6 +528,11 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE2-NEXT: mul z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = mul <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -414,6 +554,11 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE2-NEXT: mul z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mul v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = mul <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -438,6 +583,15 @@ define void @mul_v8i32(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: mul v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: mul v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = mul <8 x i32> %op1, %op2 @@ -462,6 +616,16 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE2-NEXT: mul z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = mul <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -483,6 +647,18 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE2-NEXT: mul z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x10, d1 +; NONEON-NOSVE-NEXT: fmov x11, d0 +; NONEON-NOSVE-NEXT: mov x8, v1.d[1] +; NONEON-NOSVE-NEXT: mov x9, v0.d[1] +; NONEON-NOSVE-NEXT: mul x10, x11, x10 +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: mov v0.d[1], x8 +; NONEON-NOSVE-NEXT: ret %res = mul <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -507,6 +683,29 @@ define void @mul_v4i64(ptr %a, ptr %b) { ; SVE2-NEXT: mul z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: fmov x12, d2 +; NONEON-NOSVE-NEXT: mov x11, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: mov x10, v3.d[1] +; NONEON-NOSVE-NEXT: mov x13, v1.d[1] +; NONEON-NOSVE-NEXT: mov x14, v0.d[1] +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: fmov x9, d3 +; NONEON-NOSVE-NEXT: mul x10, x11, x10 +; NONEON-NOSVE-NEXT: mul x9, x12, x9 +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: mul x11, x14, x13 +; NONEON-NOSVE-NEXT: fmov d0, x9 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: mov v0.d[1], x10 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = mul <4 x i64> %op1, %op2 @@ -526,6 +725,11 @@ define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = sub <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -538,6 +742,11 @@ define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: sub z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = sub <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -550,6 +759,11 @@ define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: sub z0.b, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = sub <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -563,6 +777,15 @@ define void @sub_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: sub v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: sub v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = sub <32 x i8> %op1, %op2 @@ -578,6 +801,11 @@ define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = sub <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -590,6 +818,11 @@ define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = sub <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -602,6 +835,11 @@ define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = sub <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -615,6 +853,15 @@ define void @sub_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: sub v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: sub v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = sub <16 x i16> %op1, %op2 @@ -630,6 +877,11 @@ define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = sub <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -642,6 +894,11 @@ define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = sub <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -655,6 +912,15 @@ define void @sub_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.s, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: sub v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: sub v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = sub <8 x i32> %op1, %op2 @@ -670,6 +936,11 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = sub <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -682,6 +953,11 @@ define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = sub <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -695,6 +971,15 @@ define void @sub_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: sub z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: sub v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: sub v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = sub <4 x i64> %op1, %op2 @@ -715,6 +1000,13 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) { ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: abs v0.4h, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %op1, i1 false) ret <4 x i8> %res } @@ -727,6 +1019,11 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) { ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %op1, i1 false) ret <8 x i8> %res } @@ -739,6 +1036,11 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) { ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %op1, i1 false) ret <16 x i8> %res } @@ -752,6 +1054,14 @@ define void @abs_v32i8(ptr %a) { ; CHECK-NEXT: abs z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: abs v0.16b, v0.16b +; NONEON-NOSVE-NEXT: abs v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %op1, i1 false) store <32 x i8> %res, ptr %a @@ -767,6 +1077,13 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: abs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %op1, i1 false) ret <2 x i16> %res } @@ -779,6 +1096,11 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) { ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.4h, v0.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %op1, i1 false) ret <4 x i16> %res } @@ -791,6 +1113,11 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) { ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.8h, v0.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %op1, i1 false) ret <8 x i16> %res } @@ -804,6 +1131,14 @@ define void @abs_v16i16(ptr %a) { ; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: abs v0.8h, v0.8h +; NONEON-NOSVE-NEXT: abs v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %op1, i1 false) store <16 x i16> %res, ptr %a @@ -818,6 +1153,11 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false) ret <2 x i32> %res } @@ -830,6 +1170,11 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false) ret <4 x i32> %res } @@ -843,6 +1188,14 @@ define void @abs_v8i32(ptr %a) { ; CHECK-NEXT: abs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: abs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false) store <8 x i32> %res, ptr %a @@ -857,6 +1210,11 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) { ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs d0, d0 +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false) ret <1 x i64> %res } @@ -869,6 +1227,11 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) { ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false) ret <2 x i64> %res } @@ -882,6 +1245,14 @@ define void @abs_v4i64(ptr %a) { ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: abs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll index c2f3bbfb51dd52..ee0ca0e60b5e51 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,11 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <8 x i8> %op1, %op2 %sext = sext <8 x i1> %cmp to <8 x i8> ret <8 x i8> %sext @@ -33,6 +39,11 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <16 x i8> %op1, %op2 %sext = sext <16 x i1> %cmp to <16 x i8> ret <16 x i8> %sext @@ -50,6 +61,15 @@ define void @icmp_eq_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmeq v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: cmeq v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %cmp = icmp eq <32 x i8> %op1, %op2 @@ -68,6 +88,11 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <4 x i16> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i16> ret <4 x i16> %sext @@ -83,6 +108,11 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <8 x i16> %op1, %op2 %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext @@ -100,6 +130,15 @@ define void @icmp_eq_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmeq v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: cmeq v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %cmp = icmp eq <16 x i16> %op1, %op2 @@ -118,6 +157,11 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <2 x i32> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i32> ret <2 x i32> %sext @@ -133,6 +177,11 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <4 x i32> %op1, %op2 %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext @@ -150,6 +199,15 @@ define void @icmp_eq_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmeq v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: cmeq v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %cmp = icmp eq <8 x i32> %op1, %op2 @@ -168,6 +226,11 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <1 x i64> %op1, %op2 %sext = sext <1 x i1> %cmp to <1 x i64> ret <1 x i64> %sext @@ -183,6 +246,11 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmeq v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %cmp = icmp eq <2 x i64> %op1, %op2 %sext = sext <2 x i1> %cmp to <2 x i64> ret <2 x i64> %sext @@ -200,6 +268,15 @@ define void @icmp_eq_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmeq v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: cmeq v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %cmp = icmp eq <4 x i64> %op1, %op2 @@ -224,6 +301,17 @@ define void @icmp_ne_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ne_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmeq v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: cmeq v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b +; NONEON-NOSVE-NEXT: mvn v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %cmp = icmp ne <32 x i8> %op1, %op2 @@ -246,6 +334,14 @@ define void @icmp_sge_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sge_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: cmge v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b %cmp = icmp sge <8 x i16> %op1, %op2 @@ -270,6 +366,15 @@ define void @icmp_sgt_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sgt_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmgt v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: cmgt v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %cmp = icmp sgt <16 x i16> %op1, %op2 @@ -292,6 +397,14 @@ define void @icmp_sle_v4i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sle_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: cmge v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %op2 = load <4 x i32>, ptr %b %cmp = icmp sle <4 x i32> %op1, %op2 @@ -316,6 +429,15 @@ define void @icmp_slt_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_slt_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmgt v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: cmgt v1.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %cmp = icmp slt <8 x i32> %op1, %op2 @@ -338,6 +460,14 @@ define void @icmp_uge_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_uge_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: cmhs v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp uge <2 x i64> %op1, %op2 @@ -360,6 +490,14 @@ define void @icmp_ugt_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ugt_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: cmhi v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp ugt <2 x i64> %op1, %op2 @@ -382,6 +520,14 @@ define void @icmp_ule_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ule_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: cmhs v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp ule <2 x i64> %op1, %op2 @@ -404,6 +550,14 @@ define void @icmp_ult_v2i64(ptr %a, ptr %b) { ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ult_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: cmhi v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b %cmp = icmp ult <2 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index e6fd775b4cfb9b..d79d6c18ed5a6e 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -24,6 +25,31 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w10, v0.h[0] +; NONEON-NOSVE-NEXT: smov w11, v0.h[2] +; NONEON-NOSVE-NEXT: smov w12, v0.h[3] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.h[0] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.h[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s0, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w8, w12, w11 +; NONEON-NOSVE-NEXT: mov v0.h[2], w10 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -51,6 +77,45 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: smov w8, v1.b[1] +; NONEON-NOSVE-NEXT: smov w9, v0.b[1] +; NONEON-NOSVE-NEXT: smov w10, v0.b[0] +; NONEON-NOSVE-NEXT: smov w11, v0.b[2] +; NONEON-NOSVE-NEXT: smov w12, v0.b[3] +; NONEON-NOSVE-NEXT: smov w13, v0.b[4] +; NONEON-NOSVE-NEXT: smov w14, v0.b[5] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.b[0] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v1.b[3] +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: smov w9, v1.b[6] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: smov w12, v1.b[4] +; NONEON-NOSVE-NEXT: mov v2.b[2], w10 +; NONEON-NOSVE-NEXT: smov w10, v0.b[6] +; NONEON-NOSVE-NEXT: sdiv w12, w13, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.b[5] +; NONEON-NOSVE-NEXT: mov v2.b[3], w11 +; NONEON-NOSVE-NEXT: smov w11, v0.b[7] +; NONEON-NOSVE-NEXT: sdiv w8, w14, w13 +; NONEON-NOSVE-NEXT: mov v2.b[4], w12 +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[7] +; NONEON-NOSVE-NEXT: mov v2.b[5], w8 +; NONEON-NOSVE-NEXT: sdiv w8, w11, w10 +; NONEON-NOSVE-NEXT: mov v2.b[6], w9 +; NONEON-NOSVE-NEXT: mov v2.b[7], w8 +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -98,6 +163,75 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smov w8, v1.b[1] +; NONEON-NOSVE-NEXT: smov w9, v0.b[1] +; NONEON-NOSVE-NEXT: smov w10, v0.b[0] +; NONEON-NOSVE-NEXT: smov w11, v0.b[2] +; NONEON-NOSVE-NEXT: smov w12, v0.b[3] +; NONEON-NOSVE-NEXT: smov w13, v0.b[4] +; NONEON-NOSVE-NEXT: smov w14, v0.b[5] +; NONEON-NOSVE-NEXT: smov w15, v0.b[6] +; NONEON-NOSVE-NEXT: smov w16, v0.b[7] +; NONEON-NOSVE-NEXT: smov w17, v0.b[8] +; NONEON-NOSVE-NEXT: smov w18, v0.b[9] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.b[0] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v1.b[3] +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: smov w9, v1.b[10] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: smov w12, v1.b[4] +; NONEON-NOSVE-NEXT: mov v2.b[2], w10 +; NONEON-NOSVE-NEXT: smov w10, v0.b[10] +; NONEON-NOSVE-NEXT: sdiv w12, w13, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.b[5] +; NONEON-NOSVE-NEXT: mov v2.b[3], w11 +; NONEON-NOSVE-NEXT: smov w11, v0.b[11] +; NONEON-NOSVE-NEXT: sdiv w13, w14, w13 +; NONEON-NOSVE-NEXT: smov w14, v1.b[6] +; NONEON-NOSVE-NEXT: mov v2.b[4], w12 +; NONEON-NOSVE-NEXT: smov w12, v0.b[12] +; NONEON-NOSVE-NEXT: sdiv w14, w15, w14 +; NONEON-NOSVE-NEXT: smov w15, v1.b[7] +; NONEON-NOSVE-NEXT: mov v2.b[5], w13 +; NONEON-NOSVE-NEXT: smov w13, v0.b[13] +; NONEON-NOSVE-NEXT: sdiv w15, w16, w15 +; NONEON-NOSVE-NEXT: smov w16, v1.b[8] +; NONEON-NOSVE-NEXT: mov v2.b[6], w14 +; NONEON-NOSVE-NEXT: sdiv w16, w17, w16 +; NONEON-NOSVE-NEXT: smov w17, v1.b[9] +; NONEON-NOSVE-NEXT: mov v2.b[7], w15 +; NONEON-NOSVE-NEXT: sdiv w8, w18, w17 +; NONEON-NOSVE-NEXT: mov v2.b[8], w16 +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[11] +; NONEON-NOSVE-NEXT: mov v2.b[9], w8 +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v1.b[12] +; NONEON-NOSVE-NEXT: mov v2.b[10], w9 +; NONEON-NOSVE-NEXT: smov w9, v1.b[14] +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: smov w12, v1.b[13] +; NONEON-NOSVE-NEXT: mov v2.b[11], w10 +; NONEON-NOSVE-NEXT: smov w10, v1.b[15] +; NONEON-NOSVE-NEXT: sdiv w8, w13, w12 +; NONEON-NOSVE-NEXT: smov w12, v0.b[14] +; NONEON-NOSVE-NEXT: mov v2.b[12], w11 +; NONEON-NOSVE-NEXT: smov w11, v0.b[15] +; NONEON-NOSVE-NEXT: sdiv w9, w12, w9 +; NONEON-NOSVE-NEXT: mov v2.b[13], w8 +; NONEON-NOSVE-NEXT: sdiv w8, w11, w10 +; NONEON-NOSVE-NEXT: mov v2.b[14], w9 +; NONEON-NOSVE-NEXT: mov v2.b[15], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = sdiv <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -178,6 +312,163 @@ define void @sdiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b ; CHECK-NEXT: stp q3, q2, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -80 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: smov w8, v1.b[1] +; NONEON-NOSVE-NEXT: smov w9, v0.b[1] +; NONEON-NOSVE-NEXT: smov w10, v0.b[0] +; NONEON-NOSVE-NEXT: smov w11, v0.b[2] +; NONEON-NOSVE-NEXT: smov w12, v0.b[3] +; NONEON-NOSVE-NEXT: smov w13, v0.b[4] +; NONEON-NOSVE-NEXT: smov w14, v0.b[5] +; NONEON-NOSVE-NEXT: smov w15, v0.b[6] +; NONEON-NOSVE-NEXT: smov w17, v0.b[8] +; NONEON-NOSVE-NEXT: smov w2, v0.b[10] +; NONEON-NOSVE-NEXT: smov w3, v0.b[11] +; NONEON-NOSVE-NEXT: smov w4, v0.b[12] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.b[0] +; NONEON-NOSVE-NEXT: smov w5, v0.b[13] +; NONEON-NOSVE-NEXT: smov w6, v0.b[14] +; NONEON-NOSVE-NEXT: smov w1, v3.b[1] +; NONEON-NOSVE-NEXT: smov w7, v2.b[0] +; NONEON-NOSVE-NEXT: smov w19, v2.b[2] +; NONEON-NOSVE-NEXT: smov w20, v2.b[3] +; NONEON-NOSVE-NEXT: smov w21, v2.b[4] +; NONEON-NOSVE-NEXT: smov w22, v2.b[5] +; NONEON-NOSVE-NEXT: smov w23, v2.b[6] +; NONEON-NOSVE-NEXT: smov w24, v2.b[7] +; NONEON-NOSVE-NEXT: smov w25, v2.b[8] +; NONEON-NOSVE-NEXT: smov w26, v2.b[9] +; NONEON-NOSVE-NEXT: smov w27, v2.b[10] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[2] +; NONEON-NOSVE-NEXT: sdiv w11, w11, w10 +; NONEON-NOSVE-NEXT: smov w10, v1.b[3] +; NONEON-NOSVE-NEXT: fmov s5, w9 +; NONEON-NOSVE-NEXT: smov w9, v3.b[11] +; NONEON-NOSVE-NEXT: mov v5.b[1], w8 +; NONEON-NOSVE-NEXT: sdiv w10, w12, w10 +; NONEON-NOSVE-NEXT: smov w12, v1.b[4] +; NONEON-NOSVE-NEXT: mov v5.b[2], w11 +; NONEON-NOSVE-NEXT: smov w11, v2.b[11] +; NONEON-NOSVE-NEXT: sdiv w13, w13, w12 +; NONEON-NOSVE-NEXT: smov w12, v1.b[5] +; NONEON-NOSVE-NEXT: mov v5.b[3], w10 +; NONEON-NOSVE-NEXT: smov w10, v3.b[12] +; NONEON-NOSVE-NEXT: sdiv w12, w14, w12 +; NONEON-NOSVE-NEXT: smov w14, v1.b[6] +; NONEON-NOSVE-NEXT: mov v5.b[4], w13 +; NONEON-NOSVE-NEXT: smov w13, v2.b[14] +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: smov w14, v1.b[7] +; NONEON-NOSVE-NEXT: smov w15, v0.b[7] +; NONEON-NOSVE-NEXT: mov v5.b[5], w12 +; NONEON-NOSVE-NEXT: smov w12, v2.b[13] +; NONEON-NOSVE-NEXT: sdiv w14, w15, w14 +; NONEON-NOSVE-NEXT: smov w15, v1.b[8] +; NONEON-NOSVE-NEXT: mov v5.b[6], w16 +; NONEON-NOSVE-NEXT: sdiv w18, w17, w15 +; NONEON-NOSVE-NEXT: smov w15, v1.b[9] +; NONEON-NOSVE-NEXT: smov w17, v0.b[9] +; NONEON-NOSVE-NEXT: mov v5.b[7], w14 +; NONEON-NOSVE-NEXT: sdiv w17, w17, w15 +; NONEON-NOSVE-NEXT: smov w15, v1.b[10] +; NONEON-NOSVE-NEXT: mov v5.b[8], w18 +; NONEON-NOSVE-NEXT: sdiv w15, w2, w15 +; NONEON-NOSVE-NEXT: smov w2, v1.b[11] +; NONEON-NOSVE-NEXT: mov v5.b[9], w17 +; NONEON-NOSVE-NEXT: sdiv w2, w3, w2 +; NONEON-NOSVE-NEXT: smov w3, v1.b[12] +; NONEON-NOSVE-NEXT: mov v5.b[10], w15 +; NONEON-NOSVE-NEXT: sdiv w3, w4, w3 +; NONEON-NOSVE-NEXT: smov w4, v1.b[13] +; NONEON-NOSVE-NEXT: mov v5.b[11], w2 +; NONEON-NOSVE-NEXT: sdiv w4, w5, w4 +; NONEON-NOSVE-NEXT: smov w5, v1.b[14] +; NONEON-NOSVE-NEXT: mov v5.b[12], w3 +; NONEON-NOSVE-NEXT: sdiv w5, w6, w5 +; NONEON-NOSVE-NEXT: smov w6, v2.b[1] +; NONEON-NOSVE-NEXT: mov v5.b[13], w4 +; NONEON-NOSVE-NEXT: sdiv w1, w6, w1 +; NONEON-NOSVE-NEXT: smov w6, v3.b[0] +; NONEON-NOSVE-NEXT: mov v5.b[14], w5 +; NONEON-NOSVE-NEXT: sdiv w6, w7, w6 +; NONEON-NOSVE-NEXT: smov w7, v3.b[2] +; NONEON-NOSVE-NEXT: sdiv w7, w19, w7 +; NONEON-NOSVE-NEXT: smov w19, v3.b[3] +; NONEON-NOSVE-NEXT: fmov s4, w6 +; NONEON-NOSVE-NEXT: mov v4.b[1], w1 +; NONEON-NOSVE-NEXT: sdiv w19, w20, w19 +; NONEON-NOSVE-NEXT: smov w20, v3.b[4] +; NONEON-NOSVE-NEXT: mov v4.b[2], w7 +; NONEON-NOSVE-NEXT: sdiv w20, w21, w20 +; NONEON-NOSVE-NEXT: smov w21, v3.b[5] +; NONEON-NOSVE-NEXT: mov v4.b[3], w19 +; NONEON-NOSVE-NEXT: sdiv w21, w22, w21 +; NONEON-NOSVE-NEXT: smov w22, v3.b[6] +; NONEON-NOSVE-NEXT: mov v4.b[4], w20 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w22, w23, w22 +; NONEON-NOSVE-NEXT: smov w23, v3.b[7] +; NONEON-NOSVE-NEXT: mov v4.b[5], w21 +; NONEON-NOSVE-NEXT: sdiv w23, w24, w23 +; NONEON-NOSVE-NEXT: smov w24, v3.b[8] +; NONEON-NOSVE-NEXT: mov v4.b[6], w22 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w24, w25, w24 +; NONEON-NOSVE-NEXT: smov w25, v3.b[9] +; NONEON-NOSVE-NEXT: mov v4.b[7], w23 +; NONEON-NOSVE-NEXT: sdiv w25, w26, w25 +; NONEON-NOSVE-NEXT: smov w26, v3.b[10] +; NONEON-NOSVE-NEXT: mov v4.b[8], w24 +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w8, w27, w26 +; NONEON-NOSVE-NEXT: mov v4.b[9], w25 +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w9, w11, w9 +; NONEON-NOSVE-NEXT: smov w11, v2.b[12] +; NONEON-NOSVE-NEXT: mov v4.b[10], w8 +; NONEON-NOSVE-NEXT: smov w8, v3.b[15] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v3.b[13] +; NONEON-NOSVE-NEXT: mov v4.b[11], w9 +; NONEON-NOSVE-NEXT: smov w9, v1.b[15] +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: smov w12, v3.b[14] +; NONEON-NOSVE-NEXT: mov v4.b[12], w10 +; NONEON-NOSVE-NEXT: smov w10, v0.b[15] +; NONEON-NOSVE-NEXT: sdiv w12, w13, w12 +; NONEON-NOSVE-NEXT: smov w13, v2.b[15] +; NONEON-NOSVE-NEXT: mov v4.b[13], w11 +; NONEON-NOSVE-NEXT: sdiv w8, w13, w8 +; NONEON-NOSVE-NEXT: mov v4.b[14], w12 +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: mov v4.b[15], w8 +; NONEON-NOSVE-NEXT: mov v5.b[15], w9 +; NONEON-NOSVE-NEXT: stp q4, q5, [x0] +; NONEON-NOSVE-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = sdiv <32 x i8> %op1, %op2 @@ -196,6 +487,23 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: mov w10, v0.s[1] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: mov w9, v1.s[1] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: mov v0.s[1], w9 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -212,6 +520,29 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w10, v0.h[0] +; NONEON-NOSVE-NEXT: smov w11, v0.h[2] +; NONEON-NOSVE-NEXT: smov w12, v0.h[3] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.h[0] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.h[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s0, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w8, w12, w11 +; NONEON-NOSVE-NEXT: mov v0.h[2], w10 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -238,6 +569,43 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w10, v0.h[0] +; NONEON-NOSVE-NEXT: smov w11, v0.h[2] +; NONEON-NOSVE-NEXT: smov w12, v0.h[3] +; NONEON-NOSVE-NEXT: smov w13, v0.h[4] +; NONEON-NOSVE-NEXT: smov w14, v0.h[5] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.h[0] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.h[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: smov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: smov w9, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: smov w12, v1.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[2], w10 +; NONEON-NOSVE-NEXT: smov w10, v0.h[6] +; NONEON-NOSVE-NEXT: sdiv w12, w13, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.h[5] +; NONEON-NOSVE-NEXT: mov v2.h[3], w11 +; NONEON-NOSVE-NEXT: smov w11, v0.h[7] +; NONEON-NOSVE-NEXT: sdiv w8, w14, w13 +; NONEON-NOSVE-NEXT: mov v2.h[4], w12 +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: sdiv w8, w11, w10 +; NONEON-NOSVE-NEXT: mov v2.h[6], w9 +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -278,6 +646,79 @@ define void @sdiv_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h ; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w10, v0.h[0] +; NONEON-NOSVE-NEXT: smov w11, v0.h[2] +; NONEON-NOSVE-NEXT: smov w12, v0.h[3] +; NONEON-NOSVE-NEXT: smov w13, v0.h[4] +; NONEON-NOSVE-NEXT: smov w14, v0.h[5] +; NONEON-NOSVE-NEXT: smov w15, v0.h[6] +; NONEON-NOSVE-NEXT: smov w16, v2.h[1] +; NONEON-NOSVE-NEXT: smov w17, v2.h[0] +; NONEON-NOSVE-NEXT: smov w18, v2.h[2] +; NONEON-NOSVE-NEXT: smov w1, v2.h[3] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v1.h[0] +; NONEON-NOSVE-NEXT: smov w2, v2.h[4] +; NONEON-NOSVE-NEXT: smov w3, v2.h[5] +; NONEON-NOSVE-NEXT: smov w4, v2.h[6] +; NONEON-NOSVE-NEXT: sdiv w10, w10, w9 +; NONEON-NOSVE-NEXT: smov w9, v1.h[2] +; NONEON-NOSVE-NEXT: sdiv w9, w11, w9 +; NONEON-NOSVE-NEXT: smov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s5, w10 +; NONEON-NOSVE-NEXT: smov w10, v3.h[7] +; NONEON-NOSVE-NEXT: mov v5.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: smov w12, v1.h[4] +; NONEON-NOSVE-NEXT: mov v5.h[2], w9 +; NONEON-NOSVE-NEXT: smov w9, v2.h[7] +; NONEON-NOSVE-NEXT: sdiv w12, w13, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.h[5] +; NONEON-NOSVE-NEXT: mov v5.h[3], w11 +; NONEON-NOSVE-NEXT: smov w11, v0.h[7] +; NONEON-NOSVE-NEXT: sdiv w13, w14, w13 +; NONEON-NOSVE-NEXT: smov w14, v1.h[6] +; NONEON-NOSVE-NEXT: mov v5.h[4], w12 +; NONEON-NOSVE-NEXT: sdiv w14, w15, w14 +; NONEON-NOSVE-NEXT: smov w15, v3.h[1] +; NONEON-NOSVE-NEXT: mov v5.h[5], w13 +; NONEON-NOSVE-NEXT: sdiv w15, w16, w15 +; NONEON-NOSVE-NEXT: smov w16, v3.h[0] +; NONEON-NOSVE-NEXT: mov v5.h[6], w14 +; NONEON-NOSVE-NEXT: sdiv w16, w17, w16 +; NONEON-NOSVE-NEXT: smov w17, v3.h[2] +; NONEON-NOSVE-NEXT: sdiv w17, w18, w17 +; NONEON-NOSVE-NEXT: smov w18, v3.h[3] +; NONEON-NOSVE-NEXT: fmov s4, w16 +; NONEON-NOSVE-NEXT: mov v4.h[1], w15 +; NONEON-NOSVE-NEXT: sdiv w18, w1, w18 +; NONEON-NOSVE-NEXT: smov w1, v3.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[2], w17 +; NONEON-NOSVE-NEXT: sdiv w1, w2, w1 +; NONEON-NOSVE-NEXT: smov w2, v3.h[5] +; NONEON-NOSVE-NEXT: mov v4.h[3], w18 +; NONEON-NOSVE-NEXT: sdiv w2, w3, w2 +; NONEON-NOSVE-NEXT: smov w3, v3.h[6] +; NONEON-NOSVE-NEXT: mov v4.h[4], w1 +; NONEON-NOSVE-NEXT: sdiv w8, w4, w3 +; NONEON-NOSVE-NEXT: mov v4.h[5], w2 +; NONEON-NOSVE-NEXT: sdiv w9, w9, w10 +; NONEON-NOSVE-NEXT: smov w10, v1.h[7] +; NONEON-NOSVE-NEXT: mov v4.h[6], w8 +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: mov v4.h[7], w9 +; NONEON-NOSVE-NEXT: mov v5.h[7], w10 +; NONEON-NOSVE-NEXT: stp q4, q5, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = sdiv <16 x i16> %op1, %op2 @@ -294,6 +735,21 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: mov w10, v0.s[1] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: mov w9, v1.s[1] +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: mov v0.s[1], w9 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -307,6 +763,26 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w10, s0 +; NONEON-NOSVE-NEXT: mov w11, v0.s[2] +; NONEON-NOSVE-NEXT: mov w12, v0.s[3] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: fmov w9, s1 +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: mov w10, v1.s[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: mov w11, v1.s[3] +; NONEON-NOSVE-NEXT: fmov s0, w9 +; NONEON-NOSVE-NEXT: mov v0.s[1], w8 +; NONEON-NOSVE-NEXT: sdiv w8, w12, w11 +; NONEON-NOSVE-NEXT: mov v0.s[2], w10 +; NONEON-NOSVE-NEXT: mov v0.s[3], w8 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -322,6 +798,45 @@ define void @sdiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w10, s0 +; NONEON-NOSVE-NEXT: mov w11, v0.s[2] +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w12, v2.s[1] +; NONEON-NOSVE-NEXT: fmov w13, s2 +; NONEON-NOSVE-NEXT: mov w14, v2.s[2] +; NONEON-NOSVE-NEXT: mov w15, v2.s[3] +; NONEON-NOSVE-NEXT: mov w16, v0.s[3] +; NONEON-NOSVE-NEXT: sdiv w8, w9, w8 +; NONEON-NOSVE-NEXT: fmov w9, s1 +; NONEON-NOSVE-NEXT: sdiv w9, w10, w9 +; NONEON-NOSVE-NEXT: mov w10, v1.s[2] +; NONEON-NOSVE-NEXT: sdiv w10, w11, w10 +; NONEON-NOSVE-NEXT: mov w11, v3.s[1] +; NONEON-NOSVE-NEXT: sdiv w11, w12, w11 +; NONEON-NOSVE-NEXT: fmov w12, s3 +; NONEON-NOSVE-NEXT: sdiv w12, w13, w12 +; NONEON-NOSVE-NEXT: mov w13, v3.s[2] +; NONEON-NOSVE-NEXT: sdiv w13, w14, w13 +; NONEON-NOSVE-NEXT: mov w14, v3.s[3] +; NONEON-NOSVE-NEXT: fmov s0, w12 +; NONEON-NOSVE-NEXT: mov v0.s[1], w11 +; NONEON-NOSVE-NEXT: sdiv w14, w15, w14 +; NONEON-NOSVE-NEXT: mov w15, v1.s[3] +; NONEON-NOSVE-NEXT: fmov s1, w9 +; NONEON-NOSVE-NEXT: mov v0.s[2], w13 +; NONEON-NOSVE-NEXT: mov v1.s[1], w8 +; NONEON-NOSVE-NEXT: mov v1.s[2], w10 +; NONEON-NOSVE-NEXT: sdiv w8, w16, w15 +; NONEON-NOSVE-NEXT: mov v0.s[3], w14 +; NONEON-NOSVE-NEXT: mov v1.s[3], w8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = sdiv <8 x i32> %op1, %op2 @@ -338,6 +853,16 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = sdiv <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -351,6 +876,18 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x10, v0.d[1] +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: mov x9, v1.d[1] +; NONEON-NOSVE-NEXT: sdiv x9, x10, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -366,6 +903,29 @@ define void @sdiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: sdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x10, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x11, d2 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: mov x12, v0.d[1] +; NONEON-NOSVE-NEXT: sdiv x8, x9, x8 +; NONEON-NOSVE-NEXT: mov x9, v3.d[1] +; NONEON-NOSVE-NEXT: sdiv x9, x10, x9 +; NONEON-NOSVE-NEXT: fmov x10, d3 +; NONEON-NOSVE-NEXT: sdiv x10, x11, x10 +; NONEON-NOSVE-NEXT: mov x11, v1.d[1] +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: sdiv x11, x12, x11 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = sdiv <4 x i64> %op1, %op2 @@ -391,6 +951,37 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w10, v0.h[0] +; NONEON-NOSVE-NEXT: umov w11, v0.h[2] +; NONEON-NOSVE-NEXT: umov w12, v0.h[3] +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: and w10, w10, #0xff +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.h[0] +; NONEON-NOSVE-NEXT: and w11, w11, #0xff +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.h[2] +; NONEON-NOSVE-NEXT: and w10, w10, #0xff +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s0, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: and w9, w11, #0xff +; NONEON-NOSVE-NEXT: and w11, w12, #0xff +; NONEON-NOSVE-NEXT: udiv w8, w11, w9 +; NONEON-NOSVE-NEXT: mov v0.h[2], w10 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = udiv <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -418,6 +1009,45 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w8, v1.b[1] +; NONEON-NOSVE-NEXT: umov w9, v0.b[1] +; NONEON-NOSVE-NEXT: umov w10, v0.b[0] +; NONEON-NOSVE-NEXT: umov w11, v0.b[2] +; NONEON-NOSVE-NEXT: umov w12, v0.b[3] +; NONEON-NOSVE-NEXT: umov w13, v0.b[4] +; NONEON-NOSVE-NEXT: umov w14, v0.b[5] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.b[0] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[2] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v1.b[3] +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: umov w9, v1.b[6] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: umov w12, v1.b[4] +; NONEON-NOSVE-NEXT: mov v2.b[2], w10 +; NONEON-NOSVE-NEXT: umov w10, v0.b[6] +; NONEON-NOSVE-NEXT: udiv w12, w13, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.b[5] +; NONEON-NOSVE-NEXT: mov v2.b[3], w11 +; NONEON-NOSVE-NEXT: umov w11, v0.b[7] +; NONEON-NOSVE-NEXT: udiv w8, w14, w13 +; NONEON-NOSVE-NEXT: mov v2.b[4], w12 +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[7] +; NONEON-NOSVE-NEXT: mov v2.b[5], w8 +; NONEON-NOSVE-NEXT: udiv w8, w11, w10 +; NONEON-NOSVE-NEXT: mov v2.b[6], w9 +; NONEON-NOSVE-NEXT: mov v2.b[7], w8 +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %res = udiv <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -465,6 +1095,75 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umov w8, v1.b[1] +; NONEON-NOSVE-NEXT: umov w9, v0.b[1] +; NONEON-NOSVE-NEXT: umov w10, v0.b[0] +; NONEON-NOSVE-NEXT: umov w11, v0.b[2] +; NONEON-NOSVE-NEXT: umov w12, v0.b[3] +; NONEON-NOSVE-NEXT: umov w13, v0.b[4] +; NONEON-NOSVE-NEXT: umov w14, v0.b[5] +; NONEON-NOSVE-NEXT: umov w15, v0.b[6] +; NONEON-NOSVE-NEXT: umov w16, v0.b[7] +; NONEON-NOSVE-NEXT: umov w17, v0.b[8] +; NONEON-NOSVE-NEXT: umov w18, v0.b[9] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.b[0] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[2] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v1.b[3] +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: umov w9, v1.b[10] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: umov w12, v1.b[4] +; NONEON-NOSVE-NEXT: mov v2.b[2], w10 +; NONEON-NOSVE-NEXT: umov w10, v0.b[10] +; NONEON-NOSVE-NEXT: udiv w12, w13, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.b[5] +; NONEON-NOSVE-NEXT: mov v2.b[3], w11 +; NONEON-NOSVE-NEXT: umov w11, v0.b[11] +; NONEON-NOSVE-NEXT: udiv w13, w14, w13 +; NONEON-NOSVE-NEXT: umov w14, v1.b[6] +; NONEON-NOSVE-NEXT: mov v2.b[4], w12 +; NONEON-NOSVE-NEXT: umov w12, v0.b[12] +; NONEON-NOSVE-NEXT: udiv w14, w15, w14 +; NONEON-NOSVE-NEXT: umov w15, v1.b[7] +; NONEON-NOSVE-NEXT: mov v2.b[5], w13 +; NONEON-NOSVE-NEXT: umov w13, v0.b[13] +; NONEON-NOSVE-NEXT: udiv w15, w16, w15 +; NONEON-NOSVE-NEXT: umov w16, v1.b[8] +; NONEON-NOSVE-NEXT: mov v2.b[6], w14 +; NONEON-NOSVE-NEXT: udiv w16, w17, w16 +; NONEON-NOSVE-NEXT: umov w17, v1.b[9] +; NONEON-NOSVE-NEXT: mov v2.b[7], w15 +; NONEON-NOSVE-NEXT: udiv w8, w18, w17 +; NONEON-NOSVE-NEXT: mov v2.b[8], w16 +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[11] +; NONEON-NOSVE-NEXT: mov v2.b[9], w8 +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v1.b[12] +; NONEON-NOSVE-NEXT: mov v2.b[10], w9 +; NONEON-NOSVE-NEXT: umov w9, v1.b[14] +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: umov w12, v1.b[13] +; NONEON-NOSVE-NEXT: mov v2.b[11], w10 +; NONEON-NOSVE-NEXT: umov w10, v1.b[15] +; NONEON-NOSVE-NEXT: udiv w8, w13, w12 +; NONEON-NOSVE-NEXT: umov w12, v0.b[14] +; NONEON-NOSVE-NEXT: mov v2.b[12], w11 +; NONEON-NOSVE-NEXT: umov w11, v0.b[15] +; NONEON-NOSVE-NEXT: udiv w9, w12, w9 +; NONEON-NOSVE-NEXT: mov v2.b[13], w8 +; NONEON-NOSVE-NEXT: udiv w8, w11, w10 +; NONEON-NOSVE-NEXT: mov v2.b[14], w9 +; NONEON-NOSVE-NEXT: mov v2.b[15], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = udiv <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -545,6 +1244,163 @@ define void @udiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b ; CHECK-NEXT: stp q3, q2, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -80 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: umov w8, v1.b[1] +; NONEON-NOSVE-NEXT: umov w9, v0.b[1] +; NONEON-NOSVE-NEXT: umov w10, v0.b[0] +; NONEON-NOSVE-NEXT: umov w11, v0.b[2] +; NONEON-NOSVE-NEXT: umov w12, v0.b[3] +; NONEON-NOSVE-NEXT: umov w13, v0.b[4] +; NONEON-NOSVE-NEXT: umov w14, v0.b[5] +; NONEON-NOSVE-NEXT: umov w15, v0.b[6] +; NONEON-NOSVE-NEXT: umov w17, v0.b[8] +; NONEON-NOSVE-NEXT: umov w2, v0.b[10] +; NONEON-NOSVE-NEXT: umov w3, v0.b[11] +; NONEON-NOSVE-NEXT: umov w4, v0.b[12] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.b[0] +; NONEON-NOSVE-NEXT: umov w5, v0.b[13] +; NONEON-NOSVE-NEXT: umov w6, v0.b[14] +; NONEON-NOSVE-NEXT: umov w1, v3.b[1] +; NONEON-NOSVE-NEXT: umov w7, v2.b[0] +; NONEON-NOSVE-NEXT: umov w19, v2.b[2] +; NONEON-NOSVE-NEXT: umov w20, v2.b[3] +; NONEON-NOSVE-NEXT: umov w21, v2.b[4] +; NONEON-NOSVE-NEXT: umov w22, v2.b[5] +; NONEON-NOSVE-NEXT: umov w23, v2.b[6] +; NONEON-NOSVE-NEXT: umov w24, v2.b[7] +; NONEON-NOSVE-NEXT: umov w25, v2.b[8] +; NONEON-NOSVE-NEXT: umov w26, v2.b[9] +; NONEON-NOSVE-NEXT: umov w27, v2.b[10] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[2] +; NONEON-NOSVE-NEXT: udiv w11, w11, w10 +; NONEON-NOSVE-NEXT: umov w10, v1.b[3] +; NONEON-NOSVE-NEXT: fmov s5, w9 +; NONEON-NOSVE-NEXT: umov w9, v3.b[11] +; NONEON-NOSVE-NEXT: mov v5.b[1], w8 +; NONEON-NOSVE-NEXT: udiv w10, w12, w10 +; NONEON-NOSVE-NEXT: umov w12, v1.b[4] +; NONEON-NOSVE-NEXT: mov v5.b[2], w11 +; NONEON-NOSVE-NEXT: umov w11, v2.b[11] +; NONEON-NOSVE-NEXT: udiv w13, w13, w12 +; NONEON-NOSVE-NEXT: umov w12, v1.b[5] +; NONEON-NOSVE-NEXT: mov v5.b[3], w10 +; NONEON-NOSVE-NEXT: umov w10, v3.b[12] +; NONEON-NOSVE-NEXT: udiv w12, w14, w12 +; NONEON-NOSVE-NEXT: umov w14, v1.b[6] +; NONEON-NOSVE-NEXT: mov v5.b[4], w13 +; NONEON-NOSVE-NEXT: umov w13, v2.b[14] +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: umov w14, v1.b[7] +; NONEON-NOSVE-NEXT: umov w15, v0.b[7] +; NONEON-NOSVE-NEXT: mov v5.b[5], w12 +; NONEON-NOSVE-NEXT: umov w12, v2.b[13] +; NONEON-NOSVE-NEXT: udiv w14, w15, w14 +; NONEON-NOSVE-NEXT: umov w15, v1.b[8] +; NONEON-NOSVE-NEXT: mov v5.b[6], w16 +; NONEON-NOSVE-NEXT: udiv w18, w17, w15 +; NONEON-NOSVE-NEXT: umov w15, v1.b[9] +; NONEON-NOSVE-NEXT: umov w17, v0.b[9] +; NONEON-NOSVE-NEXT: mov v5.b[7], w14 +; NONEON-NOSVE-NEXT: udiv w17, w17, w15 +; NONEON-NOSVE-NEXT: umov w15, v1.b[10] +; NONEON-NOSVE-NEXT: mov v5.b[8], w18 +; NONEON-NOSVE-NEXT: udiv w15, w2, w15 +; NONEON-NOSVE-NEXT: umov w2, v1.b[11] +; NONEON-NOSVE-NEXT: mov v5.b[9], w17 +; NONEON-NOSVE-NEXT: udiv w2, w3, w2 +; NONEON-NOSVE-NEXT: umov w3, v1.b[12] +; NONEON-NOSVE-NEXT: mov v5.b[10], w15 +; NONEON-NOSVE-NEXT: udiv w3, w4, w3 +; NONEON-NOSVE-NEXT: umov w4, v1.b[13] +; NONEON-NOSVE-NEXT: mov v5.b[11], w2 +; NONEON-NOSVE-NEXT: udiv w4, w5, w4 +; NONEON-NOSVE-NEXT: umov w5, v1.b[14] +; NONEON-NOSVE-NEXT: mov v5.b[12], w3 +; NONEON-NOSVE-NEXT: udiv w5, w6, w5 +; NONEON-NOSVE-NEXT: umov w6, v2.b[1] +; NONEON-NOSVE-NEXT: mov v5.b[13], w4 +; NONEON-NOSVE-NEXT: udiv w1, w6, w1 +; NONEON-NOSVE-NEXT: umov w6, v3.b[0] +; NONEON-NOSVE-NEXT: mov v5.b[14], w5 +; NONEON-NOSVE-NEXT: udiv w6, w7, w6 +; NONEON-NOSVE-NEXT: umov w7, v3.b[2] +; NONEON-NOSVE-NEXT: udiv w7, w19, w7 +; NONEON-NOSVE-NEXT: umov w19, v3.b[3] +; NONEON-NOSVE-NEXT: fmov s4, w6 +; NONEON-NOSVE-NEXT: mov v4.b[1], w1 +; NONEON-NOSVE-NEXT: udiv w19, w20, w19 +; NONEON-NOSVE-NEXT: umov w20, v3.b[4] +; NONEON-NOSVE-NEXT: mov v4.b[2], w7 +; NONEON-NOSVE-NEXT: udiv w20, w21, w20 +; NONEON-NOSVE-NEXT: umov w21, v3.b[5] +; NONEON-NOSVE-NEXT: mov v4.b[3], w19 +; NONEON-NOSVE-NEXT: udiv w21, w22, w21 +; NONEON-NOSVE-NEXT: umov w22, v3.b[6] +; NONEON-NOSVE-NEXT: mov v4.b[4], w20 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w22, w23, w22 +; NONEON-NOSVE-NEXT: umov w23, v3.b[7] +; NONEON-NOSVE-NEXT: mov v4.b[5], w21 +; NONEON-NOSVE-NEXT: udiv w23, w24, w23 +; NONEON-NOSVE-NEXT: umov w24, v3.b[8] +; NONEON-NOSVE-NEXT: mov v4.b[6], w22 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w24, w25, w24 +; NONEON-NOSVE-NEXT: umov w25, v3.b[9] +; NONEON-NOSVE-NEXT: mov v4.b[7], w23 +; NONEON-NOSVE-NEXT: udiv w25, w26, w25 +; NONEON-NOSVE-NEXT: umov w26, v3.b[10] +; NONEON-NOSVE-NEXT: mov v4.b[8], w24 +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w8, w27, w26 +; NONEON-NOSVE-NEXT: mov v4.b[9], w25 +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w9, w11, w9 +; NONEON-NOSVE-NEXT: umov w11, v2.b[12] +; NONEON-NOSVE-NEXT: mov v4.b[10], w8 +; NONEON-NOSVE-NEXT: umov w8, v3.b[15] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v3.b[13] +; NONEON-NOSVE-NEXT: mov v4.b[11], w9 +; NONEON-NOSVE-NEXT: umov w9, v1.b[15] +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: umov w12, v3.b[14] +; NONEON-NOSVE-NEXT: mov v4.b[12], w10 +; NONEON-NOSVE-NEXT: umov w10, v0.b[15] +; NONEON-NOSVE-NEXT: udiv w12, w13, w12 +; NONEON-NOSVE-NEXT: umov w13, v2.b[15] +; NONEON-NOSVE-NEXT: mov v4.b[13], w11 +; NONEON-NOSVE-NEXT: udiv w8, w13, w8 +; NONEON-NOSVE-NEXT: mov v4.b[14], w12 +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: mov v4.b[15], w8 +; NONEON-NOSVE-NEXT: mov v5.b[15], w9 +; NONEON-NOSVE-NEXT: stp q4, q5, [x0] +; NONEON-NOSVE-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = udiv <32 x i8> %op1, %op2 @@ -563,6 +1419,22 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: mov w10, v0.s[1] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: mov w9, v1.s[1] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: mov v0.s[1], w9 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = udiv <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -579,6 +1451,29 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w10, v0.h[0] +; NONEON-NOSVE-NEXT: umov w11, v0.h[2] +; NONEON-NOSVE-NEXT: umov w12, v0.h[3] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.h[0] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.h[2] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s0, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: udiv w8, w12, w11 +; NONEON-NOSVE-NEXT: mov v0.h[2], w10 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = udiv <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -605,6 +1500,43 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w10, v0.h[0] +; NONEON-NOSVE-NEXT: umov w11, v0.h[2] +; NONEON-NOSVE-NEXT: umov w12, v0.h[3] +; NONEON-NOSVE-NEXT: umov w13, v0.h[4] +; NONEON-NOSVE-NEXT: umov w14, v0.h[5] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.h[0] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.h[2] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: umov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s2, w9 +; NONEON-NOSVE-NEXT: umov w9, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: umov w12, v1.h[4] +; NONEON-NOSVE-NEXT: mov v2.h[2], w10 +; NONEON-NOSVE-NEXT: umov w10, v0.h[6] +; NONEON-NOSVE-NEXT: udiv w12, w13, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.h[5] +; NONEON-NOSVE-NEXT: mov v2.h[3], w11 +; NONEON-NOSVE-NEXT: umov w11, v0.h[7] +; NONEON-NOSVE-NEXT: udiv w8, w14, w13 +; NONEON-NOSVE-NEXT: mov v2.h[4], w12 +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: udiv w8, w11, w10 +; NONEON-NOSVE-NEXT: mov v2.h[6], w9 +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = udiv <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -645,6 +1577,79 @@ define void @udiv_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h ; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w10, v0.h[0] +; NONEON-NOSVE-NEXT: umov w11, v0.h[2] +; NONEON-NOSVE-NEXT: umov w12, v0.h[3] +; NONEON-NOSVE-NEXT: umov w13, v0.h[4] +; NONEON-NOSVE-NEXT: umov w14, v0.h[5] +; NONEON-NOSVE-NEXT: umov w15, v0.h[6] +; NONEON-NOSVE-NEXT: umov w16, v2.h[1] +; NONEON-NOSVE-NEXT: umov w17, v2.h[0] +; NONEON-NOSVE-NEXT: umov w18, v2.h[2] +; NONEON-NOSVE-NEXT: umov w1, v2.h[3] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v1.h[0] +; NONEON-NOSVE-NEXT: umov w2, v2.h[4] +; NONEON-NOSVE-NEXT: umov w3, v2.h[5] +; NONEON-NOSVE-NEXT: umov w4, v2.h[6] +; NONEON-NOSVE-NEXT: udiv w10, w10, w9 +; NONEON-NOSVE-NEXT: umov w9, v1.h[2] +; NONEON-NOSVE-NEXT: udiv w9, w11, w9 +; NONEON-NOSVE-NEXT: umov w11, v1.h[3] +; NONEON-NOSVE-NEXT: fmov s5, w10 +; NONEON-NOSVE-NEXT: umov w10, v3.h[7] +; NONEON-NOSVE-NEXT: mov v5.h[1], w8 +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: umov w12, v1.h[4] +; NONEON-NOSVE-NEXT: mov v5.h[2], w9 +; NONEON-NOSVE-NEXT: umov w9, v2.h[7] +; NONEON-NOSVE-NEXT: udiv w12, w13, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.h[5] +; NONEON-NOSVE-NEXT: mov v5.h[3], w11 +; NONEON-NOSVE-NEXT: umov w11, v0.h[7] +; NONEON-NOSVE-NEXT: udiv w13, w14, w13 +; NONEON-NOSVE-NEXT: umov w14, v1.h[6] +; NONEON-NOSVE-NEXT: mov v5.h[4], w12 +; NONEON-NOSVE-NEXT: udiv w14, w15, w14 +; NONEON-NOSVE-NEXT: umov w15, v3.h[1] +; NONEON-NOSVE-NEXT: mov v5.h[5], w13 +; NONEON-NOSVE-NEXT: udiv w15, w16, w15 +; NONEON-NOSVE-NEXT: umov w16, v3.h[0] +; NONEON-NOSVE-NEXT: mov v5.h[6], w14 +; NONEON-NOSVE-NEXT: udiv w16, w17, w16 +; NONEON-NOSVE-NEXT: umov w17, v3.h[2] +; NONEON-NOSVE-NEXT: udiv w17, w18, w17 +; NONEON-NOSVE-NEXT: umov w18, v3.h[3] +; NONEON-NOSVE-NEXT: fmov s4, w16 +; NONEON-NOSVE-NEXT: mov v4.h[1], w15 +; NONEON-NOSVE-NEXT: udiv w18, w1, w18 +; NONEON-NOSVE-NEXT: umov w1, v3.h[4] +; NONEON-NOSVE-NEXT: mov v4.h[2], w17 +; NONEON-NOSVE-NEXT: udiv w1, w2, w1 +; NONEON-NOSVE-NEXT: umov w2, v3.h[5] +; NONEON-NOSVE-NEXT: mov v4.h[3], w18 +; NONEON-NOSVE-NEXT: udiv w2, w3, w2 +; NONEON-NOSVE-NEXT: umov w3, v3.h[6] +; NONEON-NOSVE-NEXT: mov v4.h[4], w1 +; NONEON-NOSVE-NEXT: udiv w8, w4, w3 +; NONEON-NOSVE-NEXT: mov v4.h[5], w2 +; NONEON-NOSVE-NEXT: udiv w9, w9, w10 +; NONEON-NOSVE-NEXT: umov w10, v1.h[7] +; NONEON-NOSVE-NEXT: mov v4.h[6], w8 +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: mov v4.h[7], w9 +; NONEON-NOSVE-NEXT: mov v5.h[7], w10 +; NONEON-NOSVE-NEXT: stp q4, q5, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = udiv <16 x i16> %op1, %op2 @@ -661,6 +1666,21 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: mov w10, v0.s[1] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: mov w9, v1.s[1] +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: mov v0.s[1], w9 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = udiv <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -674,6 +1694,26 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w10, s0 +; NONEON-NOSVE-NEXT: mov w11, v0.s[2] +; NONEON-NOSVE-NEXT: mov w12, v0.s[3] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: fmov w9, s1 +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: mov w10, v1.s[2] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: mov w11, v1.s[3] +; NONEON-NOSVE-NEXT: fmov s0, w9 +; NONEON-NOSVE-NEXT: mov v0.s[1], w8 +; NONEON-NOSVE-NEXT: udiv w8, w12, w11 +; NONEON-NOSVE-NEXT: mov v0.s[2], w10 +; NONEON-NOSVE-NEXT: mov v0.s[3], w8 +; NONEON-NOSVE-NEXT: ret %res = udiv <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -689,6 +1729,45 @@ define void @udiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w10, s0 +; NONEON-NOSVE-NEXT: mov w11, v0.s[2] +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w12, v2.s[1] +; NONEON-NOSVE-NEXT: fmov w13, s2 +; NONEON-NOSVE-NEXT: mov w14, v2.s[2] +; NONEON-NOSVE-NEXT: mov w15, v2.s[3] +; NONEON-NOSVE-NEXT: mov w16, v0.s[3] +; NONEON-NOSVE-NEXT: udiv w8, w9, w8 +; NONEON-NOSVE-NEXT: fmov w9, s1 +; NONEON-NOSVE-NEXT: udiv w9, w10, w9 +; NONEON-NOSVE-NEXT: mov w10, v1.s[2] +; NONEON-NOSVE-NEXT: udiv w10, w11, w10 +; NONEON-NOSVE-NEXT: mov w11, v3.s[1] +; NONEON-NOSVE-NEXT: udiv w11, w12, w11 +; NONEON-NOSVE-NEXT: fmov w12, s3 +; NONEON-NOSVE-NEXT: udiv w12, w13, w12 +; NONEON-NOSVE-NEXT: mov w13, v3.s[2] +; NONEON-NOSVE-NEXT: udiv w13, w14, w13 +; NONEON-NOSVE-NEXT: mov w14, v3.s[3] +; NONEON-NOSVE-NEXT: fmov s0, w12 +; NONEON-NOSVE-NEXT: mov v0.s[1], w11 +; NONEON-NOSVE-NEXT: udiv w14, w15, w14 +; NONEON-NOSVE-NEXT: mov w15, v1.s[3] +; NONEON-NOSVE-NEXT: fmov s1, w9 +; NONEON-NOSVE-NEXT: mov v0.s[2], w13 +; NONEON-NOSVE-NEXT: mov v1.s[1], w8 +; NONEON-NOSVE-NEXT: mov v1.s[2], w10 +; NONEON-NOSVE-NEXT: udiv w8, w16, w15 +; NONEON-NOSVE-NEXT: mov v0.s[3], w14 +; NONEON-NOSVE-NEXT: mov v1.s[3], w8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = udiv <8 x i32> %op1, %op2 @@ -705,6 +1784,16 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = udiv <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -718,6 +1807,18 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x10, v0.d[1] +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: mov x9, v1.d[1] +; NONEON-NOSVE-NEXT: udiv x9, x10, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: ret %res = udiv <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -733,6 +1834,29 @@ define void @udiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: udiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x10, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x11, d2 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: mov x12, v0.d[1] +; NONEON-NOSVE-NEXT: udiv x8, x9, x8 +; NONEON-NOSVE-NEXT: mov x9, v3.d[1] +; NONEON-NOSVE-NEXT: udiv x9, x10, x9 +; NONEON-NOSVE-NEXT: fmov x10, d3 +; NONEON-NOSVE-NEXT: udiv x10, x11, x10 +; NONEON-NOSVE-NEXT: mov x11, v1.d[1] +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: udiv x11, x12, x11 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = udiv <4 x i64> %op1, %op2 @@ -778,6 +1902,27 @@ define void @udiv_constantsplat_v8i32(ptr %a) { ; SVE2-NEXT: lsr z0.s, z0.s, #6 ; SVE2-NEXT: stp q1, q0, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #8969 // =0x2309 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: movk w8, #22765, lsl #16 +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: umull2 v3.2d, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umull v4.2d, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: umull2 v5.2d, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: umull v0.2d, v2.2s, v0.2s +; NONEON-NOSVE-NEXT: uzp2 v3.4s, v4.4s, v3.4s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v5.4s +; NONEON-NOSVE-NEXT: sub v1.4s, v1.4s, v3.4s +; NONEON-NOSVE-NEXT: sub v2.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: usra v3.4s, v1.4s, #1 +; NONEON-NOSVE-NEXT: usra v0.4s, v2.4s, #1 +; NONEON-NOSVE-NEXT: ushr v1.4s, v3.4s, #6 +; NONEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #6 +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = udiv <8 x i32> %op1, store <8 x i32> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll index e40668a8696ee2..9f8511b00c6ed1 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -26,6 +27,22 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) { ; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i1_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: shl v0.4s, v0.4s, #31 +; NONEON-NOSVE-NEXT: shl v1.4s, v1.4s, #31 +; NONEON-NOSVE-NEXT: cmlt v0.4s, v0.4s, #0 +; NONEON-NOSVE-NEXT: cmlt v1.4s, v1.4s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i1> %a to <8 x i32> store <8 x i32> %b, ptr %out ret void @@ -52,6 +69,22 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) { ; CHECK-NEXT: asr z0.d, z0.d, #61 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i3_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: shl v0.2d, v0.2d, #61 +; NONEON-NOSVE-NEXT: shl v1.2d, v1.2d, #61 +; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #61 +; NONEON-NOSVE-NEXT: sshr v1.2d, v1.2d, #61 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i3> %a to <4 x i64> store <4 x i64> %b, ptr %out ret void @@ -70,6 +103,17 @@ define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i8_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i16> store <16 x i16>%b, ptr %out ret void @@ -91,6 +135,24 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v32i8_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0 +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = sext <32 x i8> %b to <32 x i16> @@ -112,6 +174,18 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i8_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i8> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -133,6 +207,25 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i8_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out ret void @@ -167,6 +260,40 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v32i8_v32i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0 +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #72] +; NONEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0 +; NONEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: sshll v0.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v2.4s, v6.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: sshll v1.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v7.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = sext <32 x i8> %b to <32 x i32> @@ -194,6 +321,22 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) { ; CHECK-NEXT: sxtb z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i8_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: shl v0.2d, v0.2d, #56 +; NONEON-NOSVE-NEXT: shl v1.2d, v1.2d, #56 +; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #56 +; NONEON-NOSVE-NEXT: sshr v1.2d, v1.2d, #56 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i8> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -216,6 +359,26 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i8_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i8> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -253,6 +416,41 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q1, q4, [x0, #32] ; CHECK-NEXT: stp q0, q2, [x0, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i8_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-112]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 112 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #40] +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #48] +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #72] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #104] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #88] +; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q5, [x0, #64] +; NONEON-NOSVE-NEXT: sshll v1.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v2.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q4, [x0] +; NONEON-NOSVE-NEXT: sshll v0.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #96] +; NONEON-NOSVE-NEXT: stp q0, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #112 +; NONEON-NOSVE-NEXT: ret %b = sext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out ret void @@ -321,6 +519,73 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q0, q2, [x1, #224] ; CHECK-NEXT: stp q3, q1, [x1, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v32i8_v32i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #224 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 224 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: sshll v5.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: sshll v6.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v3.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v4.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: stp q3, q5, [sp, #32] +; NONEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #64] +; NONEON-NOSVE-NEXT: sshll v6.4s, v6.4h, #0 +; NONEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0 +; NONEON-NOSVE-NEXT: ldr d7, [sp, #88] +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #72] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v7.4s, v7.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q5, [sp, #128] +; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ldr d19, [sp, #152] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #96] +; NONEON-NOSVE-NEXT: ldr d20, [sp, #136] +; NONEON-NOSVE-NEXT: stp q1, q4, [sp, #160] +; NONEON-NOSVE-NEXT: ldr d17, [sp, #104] +; NONEON-NOSVE-NEXT: ldr d21, [sp, #120] +; NONEON-NOSVE-NEXT: stp q7, q6, [sp, #192] +; NONEON-NOSVE-NEXT: sshll v6.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: sshll v19.2d, v19.2s, #0 +; NONEON-NOSVE-NEXT: ldr d16, [sp, #216] +; NONEON-NOSVE-NEXT: ldr d22, [sp, #200] +; NONEON-NOSVE-NEXT: ldr d23, [sp, #184] +; NONEON-NOSVE-NEXT: ldr d18, [sp, #168] +; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: sshll v16.2d, v16.2s, #0 +; NONEON-NOSVE-NEXT: stp q5, q19, [x1] +; NONEON-NOSVE-NEXT: sshll v5.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: sshll v7.2d, v22.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: stp q6, q16, [x1, #128] +; NONEON-NOSVE-NEXT: sshll v6.2d, v23.2s, #0 +; NONEON-NOSVE-NEXT: stp q5, q7, [x1, #160] +; NONEON-NOSVE-NEXT: sshll v5.2d, v20.2s, #0 +; NONEON-NOSVE-NEXT: stp q4, q6, [x1, #192] +; NONEON-NOSVE-NEXT: sshll v4.2d, v21.2s, #0 +; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #32] +; NONEON-NOSVE-NEXT: sshll v2.2d, v17.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #64] +; NONEON-NOSVE-NEXT: sshll v3.2d, v18.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #96] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #224] +; NONEON-NOSVE-NEXT: add sp, sp, #224 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = sext <32 x i8> %b to <32 x i64> @@ -341,6 +606,17 @@ define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i16> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -361,6 +637,24 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = sext <16 x i16> %b to <16 x i32> @@ -382,6 +676,18 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i16> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -403,6 +709,25 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %b = sext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -437,6 +762,40 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v16i16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #72] +; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: sshll v0.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v2.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: sshll v1.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = sext <16 x i16> %b to <16 x i64> @@ -457,6 +816,17 @@ define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) { ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v4i32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = sext <4 x i32> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -477,6 +847,24 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sext_v8i32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a %c = sext <8 x i32> %b to <8 x i64> @@ -497,6 +885,17 @@ define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i8_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i16> store <16 x i16>%b, ptr %out ret void @@ -518,6 +917,24 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v32i8_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0 +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = zext <32 x i8> %b to <32 x i16> @@ -539,6 +956,18 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i8_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i8> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -560,6 +989,25 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i8_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out ret void @@ -594,6 +1042,40 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v32i8_v32i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0 +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #72] +; NONEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0 +; NONEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: ushll v0.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v2.4s, v6.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: ushll v1.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v7.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = zext <32 x i8> %b to <32 x i32> @@ -619,6 +1101,20 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v4i8_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = zext <4 x i8> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -641,6 +1137,26 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i8_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i8> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -678,6 +1194,41 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) { ; CHECK-NEXT: stp q1, q4, [x0, #32] ; CHECK-NEXT: stp q0, q2, [x0, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i8_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-112]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 112 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #40] +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #48] +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #80] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #72] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #104] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #88] +; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q5, [x0, #64] +; NONEON-NOSVE-NEXT: ushll v1.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v2.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q4, [x0] +; NONEON-NOSVE-NEXT: ushll v0.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #96] +; NONEON-NOSVE-NEXT: stp q0, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #112 +; NONEON-NOSVE-NEXT: ret %b = zext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out ret void @@ -746,6 +1297,73 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q0, q2, [x1, #224] ; CHECK-NEXT: stp q3, q1, [x1, #96] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v32i8_v32i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #224 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 224 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [sp] +; NONEON-NOSVE-NEXT: ushll v5.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: ushll v6.8h, v1.8b, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v3.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v4.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: stp q3, q5, [sp, #32] +; NONEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #56] +; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] +; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #64] +; NONEON-NOSVE-NEXT: ushll v6.4s, v6.4h, #0 +; NONEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0 +; NONEON-NOSVE-NEXT: ldr d7, [sp, #88] +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #72] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v7.4s, v7.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q5, [sp, #128] +; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ldr d19, [sp, #152] +; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #96] +; NONEON-NOSVE-NEXT: ldr d20, [sp, #136] +; NONEON-NOSVE-NEXT: stp q1, q4, [sp, #160] +; NONEON-NOSVE-NEXT: ldr d17, [sp, #104] +; NONEON-NOSVE-NEXT: ldr d21, [sp, #120] +; NONEON-NOSVE-NEXT: stp q7, q6, [sp, #192] +; NONEON-NOSVE-NEXT: ushll v6.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: ushll v19.2d, v19.2s, #0 +; NONEON-NOSVE-NEXT: ldr d16, [sp, #216] +; NONEON-NOSVE-NEXT: ldr d22, [sp, #200] +; NONEON-NOSVE-NEXT: ldr d23, [sp, #184] +; NONEON-NOSVE-NEXT: ldr d18, [sp, #168] +; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ushll v16.2d, v16.2s, #0 +; NONEON-NOSVE-NEXT: stp q5, q19, [x1] +; NONEON-NOSVE-NEXT: ushll v5.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: ushll v7.2d, v22.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: stp q6, q16, [x1, #128] +; NONEON-NOSVE-NEXT: ushll v6.2d, v23.2s, #0 +; NONEON-NOSVE-NEXT: stp q5, q7, [x1, #160] +; NONEON-NOSVE-NEXT: ushll v5.2d, v20.2s, #0 +; NONEON-NOSVE-NEXT: stp q4, q6, [x1, #192] +; NONEON-NOSVE-NEXT: ushll v4.2d, v21.2s, #0 +; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #32] +; NONEON-NOSVE-NEXT: ushll v2.2d, v17.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #64] +; NONEON-NOSVE-NEXT: ushll v3.2d, v18.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #96] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #224] +; NONEON-NOSVE-NEXT: add sp, sp, #224 +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a %c = zext <32 x i8> %b to <32 x i64> @@ -766,6 +1384,17 @@ define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i16_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i16> %a to <8 x i32> store <8 x i32>%b, ptr %out ret void @@ -786,6 +1415,24 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i16_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = zext <16 x i16> %b to <16 x i32> @@ -807,6 +1454,18 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v4i16_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = zext <4 x i16> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -828,6 +1487,25 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) { ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i16_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %b = zext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out ret void @@ -862,6 +1540,40 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q6, q0, [x1, #96] ; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v16i16_v16i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #72] +; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: ushll v0.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v2.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: ushll v1.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a %c = zext <16 x i16> %b to <16 x i64> @@ -882,6 +1594,17 @@ define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) { ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v4i32_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %b = zext <4 x i32> %a to <4 x i64> store <4 x i64>%b, ptr %out ret void @@ -902,6 +1625,24 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zext_v8i32_v8i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a %c = zext <8 x i32> %b to <8 x i64> @@ -928,6 +1669,21 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) { ; SVE2-NEXT: mul z0.d, z1.d, z0.d ; SVE2-NEXT: str q0, [x1] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: extend_and_mul: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v1.2s, w0 +; NONEON-NOSVE-NEXT: fmov x10, d0 +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: fmov x11, d1 +; NONEON-NOSVE-NEXT: mov x9, v1.d[1] +; NONEON-NOSVE-NEXT: mul x10, x11, x10 +; NONEON-NOSVE-NEXT: mul x8, x9, x8 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: mov v0.d[1], x8 +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0 %broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer %4 = zext <2 x i32> %broadcast.splat3 to <2 x i64> @@ -943,6 +1699,13 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) { ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: extend_no_mul: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: dup v0.2s, w0 +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret entry: %broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0 %broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll index 54276bb4ba01d2..ade60b07150ce2 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -22,6 +23,15 @@ define void @add_v32i8(ptr %a) { ; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: add v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -38,6 +48,16 @@ define void @add_v16i16(ptr %a) { ; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: add v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -54,6 +74,16 @@ define void @add_v8i32(ptr %a) { ; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -70,6 +100,16 @@ define void @add_v4i64(ptr %a) { ; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: add v1.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: add v0.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -90,6 +130,15 @@ define void @and_v32i8(ptr %a) { ; CHECK-NEXT: and z1.b, z1.b, #0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -106,6 +155,16 @@ define void @and_v16i16(ptr %a) { ; CHECK-NEXT: and z1.h, z1.h, #0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -122,6 +181,16 @@ define void @and_v8i32(ptr %a) { ; CHECK-NEXT: and z1.s, z1.s, #0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -138,6 +207,16 @@ define void @and_v4i64(ptr %a) { ; CHECK-NEXT: and z1.d, z1.d, #0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -158,6 +237,14 @@ define void @ashr_v32i8(ptr %a) { ; CHECK-NEXT: asr z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: cmlt v1.16b, v1.16b, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i32 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -174,6 +261,14 @@ define void @ashr_v16i16(ptr %a) { ; CHECK-NEXT: asr z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v0.8h, v0.8h, #0 +; NONEON-NOSVE-NEXT: cmlt v1.8h, v1.8h, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -190,6 +285,14 @@ define void @ashr_v8i32(ptr %a) { ; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v0.4s, v0.4s, #0 +; NONEON-NOSVE-NEXT: cmlt v1.4s, v1.4s, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -206,6 +309,14 @@ define void @ashr_v4i64(ptr %a) { ; CHECK-NEXT: asr z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v0.2d, v0.2d, #0 +; NONEON-NOSVE-NEXT: cmlt v1.2d, v1.2d, #0 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -229,6 +340,15 @@ define void @icmp_eq_v32i8(ptr %a) { ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_eq_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmeq v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: cmeq v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -249,6 +369,16 @@ define void @icmp_sge_v16i16(ptr %a) { ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sge_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: cmge v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: cmge v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -269,6 +399,16 @@ define void @icmp_sgt_v8i32(ptr %a) { ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_sgt_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #-8 // =0xfffffff8 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: cmgt v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: cmgt v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 -8, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -289,6 +429,16 @@ define void @icmp_ult_v4i64(ptr %a) { ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: icmp_ult_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: cmhi v1.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: cmhi v0.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -310,6 +460,14 @@ define void @lshr_v32i8(ptr %a) { ; CHECK-NEXT: lsr z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ushr v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: ushr v1.16b, v1.16b, #7 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -326,6 +484,14 @@ define void @lshr_v16i16(ptr %a) { ; CHECK-NEXT: lsr z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ushr v0.8h, v0.8h, #15 +; NONEON-NOSVE-NEXT: ushr v1.8h, v1.8h, #15 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -342,6 +508,14 @@ define void @lshr_v8i32(ptr %a) { ; CHECK-NEXT: lsr z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #31 +; NONEON-NOSVE-NEXT: ushr v1.4s, v1.4s, #31 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -358,6 +532,14 @@ define void @lshr_v4i64(ptr %a) { ; CHECK-NEXT: lsr z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ushr v0.2d, v0.2d, #63 +; NONEON-NOSVE-NEXT: ushr v1.2d, v1.2d, #63 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -378,6 +560,15 @@ define void @mul_v32i8(ptr %a) { ; CHECK-NEXT: mul z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: mul v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: mul v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -394,6 +585,16 @@ define void @mul_v16i16(ptr %a) { ; CHECK-NEXT: mul z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: mul v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: mul v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -410,6 +611,16 @@ define void @mul_v8i32(ptr %a) { ; CHECK-NEXT: mul z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: mul v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: mul v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -426,6 +637,28 @@ define void @mul_v4i64(ptr %a) { ; CHECK-NEXT: mul z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: mul_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: fmov x10, d0 +; NONEON-NOSVE-NEXT: fmov x11, d1 +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: mov x9, v1.d[1] +; NONEON-NOSVE-NEXT: lsl x12, x10, #6 +; NONEON-NOSVE-NEXT: lsl x13, x11, #6 +; NONEON-NOSVE-NEXT: lsl x14, x8, #6 +; NONEON-NOSVE-NEXT: sub x10, x12, x10 +; NONEON-NOSVE-NEXT: sub x11, x13, x11 +; NONEON-NOSVE-NEXT: lsl x12, x9, #6 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: fmov d1, x11 +; NONEON-NOSVE-NEXT: sub x8, x14, x8 +; NONEON-NOSVE-NEXT: sub x9, x12, x9 +; NONEON-NOSVE-NEXT: mov v0.d[1], x8 +; NONEON-NOSVE-NEXT: mov v1.d[1], x9 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -446,6 +679,15 @@ define void @or_v32i8(ptr %a) { ; CHECK-NEXT: orr z1.b, z1.b, #0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -462,6 +704,16 @@ define void @or_v16i16(ptr %a) { ; CHECK-NEXT: orr z1.h, z1.h, #0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -478,6 +730,16 @@ define void @or_v8i32(ptr %a) { ; CHECK-NEXT: orr z1.s, z1.s, #0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -494,6 +756,16 @@ define void @or_v4i64(ptr %a) { ; CHECK-NEXT: orr z1.d, z1.d, #0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -514,6 +786,14 @@ define void @shl_v32i8(ptr %a) { ; CHECK-NEXT: lsl z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: shl v1.16b, v1.16b, #7 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -530,6 +810,14 @@ define void @shl_v16i16(ptr %a) { ; CHECK-NEXT: lsl z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: shl v0.8h, v0.8h, #15 +; NONEON-NOSVE-NEXT: shl v1.8h, v1.8h, #15 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -546,6 +834,14 @@ define void @shl_v8i32(ptr %a) { ; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: shl v0.4s, v0.4s, #31 +; NONEON-NOSVE-NEXT: shl v1.4s, v1.4s, #31 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -562,6 +858,14 @@ define void @shl_v4i64(ptr %a) { ; CHECK-NEXT: lsl z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: shl v0.2d, v0.2d, #63 +; NONEON-NOSVE-NEXT: shl v1.2d, v1.2d, #63 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -582,6 +886,15 @@ define void @smax_v32i8(ptr %a) { ; CHECK-NEXT: smax z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smax v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: smax v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -598,6 +911,16 @@ define void @smax_v16i16(ptr %a) { ; CHECK-NEXT: smax z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: smax v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: smax v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -614,6 +937,16 @@ define void @smax_v8i32(ptr %a) { ; CHECK-NEXT: smax z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: smax v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: smax v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -630,6 +963,18 @@ define void @smax_v4i64(ptr %a) { ; CHECK-NEXT: smax z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: cmgt v3.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: cmgt v4.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b +; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -650,6 +995,15 @@ define void @smin_v32i8(ptr %a) { ; CHECK-NEXT: smin z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smin v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: smin v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -666,6 +1020,16 @@ define void @smin_v16i16(ptr %a) { ; CHECK-NEXT: smin z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: smin v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: smin v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -682,6 +1046,16 @@ define void @smin_v8i32(ptr %a) { ; CHECK-NEXT: smin z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: smin v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: smin v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -698,6 +1072,18 @@ define void @smin_v4i64(ptr %a) { ; CHECK-NEXT: smin z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: cmgt v3.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: cmgt v4.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b +; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -718,6 +1104,15 @@ define void @sub_v32i8(ptr %a) { ; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: sub v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: sub v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -734,6 +1129,16 @@ define void @sub_v16i16(ptr %a) { ; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: sub v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: sub v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -750,6 +1155,16 @@ define void @sub_v8i32(ptr %a) { ; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: sub v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: sub v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -766,6 +1181,16 @@ define void @sub_v4i64(ptr %a) { ; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sub_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: sub v1.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: sub v0.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -786,6 +1211,15 @@ define void @umax_v32i8(ptr %a) { ; CHECK-NEXT: umax z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umax v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: umax v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -802,6 +1236,16 @@ define void @umax_v16i16(ptr %a) { ; CHECK-NEXT: umax z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: umax v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: umax v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -818,6 +1262,16 @@ define void @umax_v8i32(ptr %a) { ; CHECK-NEXT: umax z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: umax v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umax v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -834,6 +1288,18 @@ define void @umax_v4i64(ptr %a) { ; CHECK-NEXT: umax z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: cmhi v3.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: cmhi v4.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b +; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -854,6 +1320,15 @@ define void @umin_v32i8(ptr %a) { ; CHECK-NEXT: umin z1.b, z1.b, #7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umin v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: umin v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -870,6 +1345,16 @@ define void @umin_v16i16(ptr %a) { ; CHECK-NEXT: umin z1.h, z1.h, #15 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: umin v1.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: umin v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -886,6 +1371,16 @@ define void @umin_v8i32(ptr %a) { ; CHECK-NEXT: umin z1.s, z1.s, #31 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: umin v1.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umin v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -902,6 +1397,18 @@ define void @umin_v4i64(ptr %a) { ; CHECK-NEXT: umin z1.d, z1.d, #63 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: cmhi v3.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: cmhi v4.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b +; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer @@ -922,6 +1429,15 @@ define void @xor_v32i8(ptr %a) { ; CHECK-NEXT: eor z1.b, z1.b, #0x7 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #7 +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %ins = insertelement <32 x i8> undef, i8 7, i64 0 %op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer @@ -938,6 +1454,16 @@ define void @xor_v16i16(ptr %a) { ; CHECK-NEXT: eor z1.h, z1.h, #0xf ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %ins = insertelement <16 x i16> undef, i16 15, i64 0 %op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer @@ -954,6 +1480,16 @@ define void @xor_v8i32(ptr %a) { ; CHECK-NEXT: eor z1.s, z1.s, #0x1f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %ins = insertelement <8 x i32> undef, i32 31, i64 0 %op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer @@ -970,6 +1506,16 @@ define void @xor_v4i64(ptr %a) { ; CHECK-NEXT: eor z1.d, z1.d, #0x3f ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %ins = insertelement <4 x i64> undef, i64 63, i64 0 %op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll index 40824ba9ae9c5f..4fc7ec3a8439df 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,11 @@ define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = and <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -28,6 +34,11 @@ define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = and <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -41,6 +52,15 @@ define void @and_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = and <32 x i8> %op1, %op2 @@ -56,6 +76,11 @@ define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = and <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -68,6 +93,11 @@ define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = and <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -81,6 +111,15 @@ define void @and_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = and <16 x i16> %op1, %op2 @@ -96,6 +135,11 @@ define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = and <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -108,6 +152,11 @@ define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = and <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -121,6 +170,15 @@ define void @and_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = and <8 x i32> %op1, %op2 @@ -136,6 +194,11 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = and <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -148,6 +211,11 @@ define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = and <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -161,6 +229,15 @@ define void @and_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: and_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = and <4 x i64> %op1, %op2 @@ -180,6 +257,11 @@ define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = or <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -192,6 +274,11 @@ define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = or <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -205,6 +292,15 @@ define void @or_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = or <32 x i8> %op1, %op2 @@ -220,6 +316,11 @@ define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = or <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -232,6 +333,11 @@ define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = or <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -245,6 +351,15 @@ define void @or_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = or <16 x i16> %op1, %op2 @@ -260,6 +375,11 @@ define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = or <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -272,6 +392,11 @@ define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = or <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -285,6 +410,15 @@ define void @or_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = or <8 x i32> %op1, %op2 @@ -300,6 +434,11 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = or <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -312,6 +451,11 @@ define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = or <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -325,6 +469,15 @@ define void @or_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: or_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = or <4 x i64> %op1, %op2 @@ -344,6 +497,11 @@ define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = xor <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -356,6 +514,11 @@ define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = xor <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -369,6 +532,15 @@ define void @xor_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = xor <32 x i8> %op1, %op2 @@ -384,6 +556,11 @@ define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = xor <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -396,6 +573,11 @@ define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = xor <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -409,6 +591,15 @@ define void @xor_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = xor <16 x i16> %op1, %op2 @@ -424,6 +615,11 @@ define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = xor <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -436,6 +632,11 @@ define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = xor <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -449,6 +650,15 @@ define void @xor_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = xor <8 x i32> %op1, %op2 @@ -464,6 +674,11 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = xor <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -476,6 +691,11 @@ define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = xor <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -489,6 +709,15 @@ define void @xor_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: xor_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = xor <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll index 74ee5482a60c41..b9c859a58611e8 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,11 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smax v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -30,6 +36,11 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smax v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -45,6 +56,15 @@ define void @smax_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smax v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: smax v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -61,6 +81,11 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smax v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -74,6 +99,11 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smax v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -89,6 +119,15 @@ define void @smax_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smax v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: smax v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -105,6 +144,11 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smax v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -118,6 +162,11 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smax v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -133,6 +182,15 @@ define void @smax_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smax v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: smax v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -150,6 +208,12 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmgt d2, d0, d1 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -164,6 +228,12 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmgt v2.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -179,6 +249,18 @@ define void @smax_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmgt v4.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: cmgt v5.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2) @@ -199,6 +281,11 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smin v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -212,6 +299,11 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smin v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -227,6 +319,15 @@ define void @smin_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smin v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: smin v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -243,6 +344,11 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smin v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -256,6 +362,11 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smin v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -271,6 +382,15 @@ define void @smin_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smin v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: smin v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -287,6 +407,11 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smin v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -300,6 +425,11 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smin v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -315,6 +445,15 @@ define void @smin_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smin v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: smin v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -332,6 +471,12 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmgt d2, d1, d0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -346,6 +491,12 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -361,6 +512,18 @@ define void @smin_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmgt v4.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: cmgt v5.2d, v3.2d, v2.2d +; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2) @@ -381,6 +544,11 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umax v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -394,6 +562,11 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umax v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -409,6 +582,15 @@ define void @umax_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umax v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: umax v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -425,6 +607,11 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umax v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -438,6 +625,11 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umax v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -453,6 +645,15 @@ define void @umax_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umax v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: umax v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -469,6 +670,11 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umax v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -482,6 +688,11 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umax v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -497,6 +708,15 @@ define void @umax_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umax v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umax v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -514,6 +734,12 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmhi d2, d0, d1 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -528,6 +754,12 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmhi v2.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -543,6 +775,18 @@ define void @umax_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umax_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmhi v4.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: cmhi v5.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2) @@ -563,6 +807,11 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umin v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2) ret <8 x i8> %res } @@ -576,6 +825,11 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umin v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2) ret <16 x i8> %res } @@ -591,6 +845,15 @@ define void @umin_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umin v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: umin v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2) @@ -607,6 +870,11 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umin v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2) ret <4 x i16> %res } @@ -620,6 +888,11 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umin v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2) ret <8 x i16> %res } @@ -635,6 +908,15 @@ define void @umin_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umin v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: umin v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2) @@ -651,6 +933,11 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umin v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2) ret <2 x i32> %res } @@ -664,6 +951,11 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umin v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2) ret <4 x i32> %res } @@ -679,6 +971,15 @@ define void @umin_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umin v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umin v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2) @@ -696,6 +997,12 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmhi d2, d1, d0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2) ret <1 x i64> %res } @@ -710,6 +1017,12 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2) ret <2 x i64> %res } @@ -725,6 +1038,18 @@ define void @umin_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umin_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: cmhi v4.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: cmhi v5.2d, v3.2d, v2.2d +; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll index 3ff6983210a0a3..3a03de3442d581 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -20,6 +21,12 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { ; NO-FA64-NEXT: mad z0.b, p0/m, z1.b, z2.b ; NO-FA64-NEXT: // kill: def $d0 killed $d0 killed $z0 ; NO-FA64-NEXT: ret +; +; NONEON-NOSVE-LABEL: mla8xi8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mla v2.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = add <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll index 8917f43002daf9..1ed3d8fa39d8da 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -2,6 +2,7 @@ ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE ; This test only tests the legal types for a given vector width, as mulh nodes ; do not get generated for non-legal types. @@ -36,6 +37,16 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE2-NEXT: lsr z0.h, z0.h, #4 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ushr v0.4h, v0.4h, #4 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i16> undef, i16 4, i64 0 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer %1 = sext <4 x i8> %op1 to <4 x i16> @@ -63,6 +74,12 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE2-NEXT: smulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smull v0.8h, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: shrn v0.8b, v0.8h, #8 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i16> undef, i16 8, i64 0 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer %1 = sext <8 x i8> %op1 to <8 x i16> @@ -90,6 +107,13 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE2-NEXT: smulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smull2 v2.8h, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: smull v0.8h, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %1 = sext <16 x i8> %op1 to <16 x i16> %2 = sext <16 x i8> %op2 to <16 x i16> %mul = mul <16 x i16> %1, %2 @@ -118,6 +142,19 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smull2 v4.8h, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: smull v0.8h, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: smull2 v1.8h, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: smull v2.8h, v2.8b, v3.8b +; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v4.16b +; NONEON-NOSVE-NEXT: uzp2 v1.16b, v2.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %1 = sext <32 x i8> %op1 to <32 x i16> @@ -153,6 +190,16 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE2-NEXT: lsr z0.s, z0.s, #16 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16 +; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: ret %1 = sext <2 x i16> %op1 to <2 x i32> %2 = sext <2 x i16> %op2 to <2 x i32> %mul = mul <2 x i32> %1, %2 @@ -178,6 +225,12 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE2-NEXT: smulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smull v0.4s, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: shrn v0.4h, v0.4s, #16 +; NONEON-NOSVE-NEXT: ret %1 = sext <4 x i16> %op1 to <4 x i32> %2 = sext <4 x i16> %op2 to <4 x i32> %mul = mul <4 x i32> %1, %2 @@ -203,6 +256,13 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE2-NEXT: smulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smull2 v2.4s, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: smull v0.4s, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: ret %1 = sext <8 x i16> %op1 to <8 x i32> %2 = sext <8 x i16> %op2 to <8 x i32> %mul = mul <8 x i32> %1, %2 @@ -231,6 +291,19 @@ define void @smulh_v16i16(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smull2 v4.4s, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: smull v0.4s, v1.4h, v0.4h +; NONEON-NOSVE-NEXT: smull2 v1.4s, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: smull v2.4s, v2.4h, v3.4h +; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp2 v1.8h, v2.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %1 = sext <16 x i16> %op1 to <16 x i32> @@ -259,6 +332,12 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE2-NEXT: smulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smull v0.2d, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: shrn v0.2s, v0.2d, #32 +; NONEON-NOSVE-NEXT: ret %1 = sext <2 x i32> %op1 to <2 x i64> %2 = sext <2 x i32> %op2 to <2 x i64> %mul = mul <2 x i64> %1, %2 @@ -284,6 +363,13 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE2-NEXT: smulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smull2 v2.2d, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: smull v0.2d, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v2.4s +; NONEON-NOSVE-NEXT: ret %1 = sext <4 x i32> %op1 to <4 x i64> %2 = sext <4 x i32> %op2 to <4 x i64> %mul = mul <4 x i64> %1, %2 @@ -312,6 +398,19 @@ define void @smulh_v8i32(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: smull2 v4.2d, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: smull v0.2d, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: smull2 v1.2d, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: smull v2.2d, v2.2s, v3.2s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp2 v1.4s, v2.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %1 = sext <8 x i32> %op1 to <8 x i64> @@ -340,6 +439,16 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE2-NEXT: smulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: fmov x9, d1 +; NONEON-NOSVE-NEXT: smulh x8, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <1 x i128> undef, i128 64, i128 0 %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer %1 = sext <1 x i64> %op1 to <1 x i128> @@ -367,6 +476,19 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE2-NEXT: smulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: mov x9, v1.d[1] +; NONEON-NOSVE-NEXT: fmov x10, d0 +; NONEON-NOSVE-NEXT: fmov x11, d1 +; NONEON-NOSVE-NEXT: smulh x10, x10, x11 +; NONEON-NOSVE-NEXT: smulh x8, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %1 = sext <2 x i64> %op1 to <2 x i128> %2 = sext <2 x i64> %op2 to <2 x i128> %mul = mul <2 x i128> %1, %2 @@ -395,6 +517,31 @@ define void @smulh_v4i64(ptr %a, ptr %b) { ; SVE2-NEXT: smulh z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: smulh_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x11, v0.d[1] +; NONEON-NOSVE-NEXT: mov x14, v3.d[1] +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: mov x10, v1.d[1] +; NONEON-NOSVE-NEXT: mov x13, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x12, d3 +; NONEON-NOSVE-NEXT: smulh x8, x8, x9 +; NONEON-NOSVE-NEXT: fmov x9, d2 +; NONEON-NOSVE-NEXT: smulh x10, x10, x11 +; NONEON-NOSVE-NEXT: smulh x9, x9, x12 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: smulh x11, x13, x14 +; NONEON-NOSVE-NEXT: fmov d1, x10 +; NONEON-NOSVE-NEXT: fmov d2, x9 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: fmov d3, x11 +; NONEON-NOSVE-NEXT: mov v2.d[1], v3.d[0] +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %1 = sext <4 x i64> %op1 to <4 x i128> @@ -433,6 +580,15 @@ define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE2-NEXT: lsr z0.h, z0.h, #4 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ushr v0.4h, v0.4h, #4 +; NONEON-NOSVE-NEXT: ret %1 = zext <4 x i8> %op1 to <4 x i16> %2 = zext <4 x i8> %op2 to <4 x i16> %mul = mul <4 x i16> %1, %2 @@ -458,6 +614,12 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE2-NEXT: umulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umull v0.8h, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: shrn v0.8b, v0.8h, #8 +; NONEON-NOSVE-NEXT: ret %1 = zext <8 x i8> %op1 to <8 x i16> %2 = zext <8 x i8> %op2 to <8 x i16> %mul = mul <8 x i16> %1, %2 @@ -483,6 +645,13 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE2-NEXT: umulh z0.b, z0.b, z1.b ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umull2 v2.8h, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: umull v0.8h, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %1 = zext <16 x i8> %op1 to <16 x i16> %2 = zext <16 x i8> %op2 to <16 x i16> %mul = mul <16 x i16> %1, %2 @@ -511,6 +680,19 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umull2 v4.8h, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: umull v0.8h, v1.8b, v0.8b +; NONEON-NOSVE-NEXT: umull2 v1.8h, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: umull v2.8h, v2.8b, v3.8b +; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v4.16b +; NONEON-NOSVE-NEXT: uzp2 v1.16b, v2.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %1 = zext <32 x i8> %op1 to <32 x i16> @@ -545,6 +727,15 @@ define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE2-NEXT: lsr z0.s, z0.s, #16 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: ret %1 = zext <2 x i16> %op1 to <2 x i32> %2 = zext <2 x i16> %op2 to <2 x i32> %mul = mul <2 x i32> %1, %2 @@ -570,6 +761,12 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE2-NEXT: umulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umull v0.4s, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: shrn v0.4h, v0.4s, #16 +; NONEON-NOSVE-NEXT: ret %1 = zext <4 x i16> %op1 to <4 x i32> %2 = zext <4 x i16> %op2 to <4 x i32> %mul = mul <4 x i32> %1, %2 @@ -595,6 +792,13 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE2-NEXT: umulh z0.h, z0.h, z1.h ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umull2 v2.4s, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: umull v0.4s, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: ret %1 = zext <8 x i16> %op1 to <8 x i32> %2 = zext <8 x i16> %op2 to <8 x i32> %mul = mul <8 x i32> %1, %2 @@ -623,6 +827,19 @@ define void @umulh_v16i16(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umull2 v4.4s, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: umull v0.4s, v1.4h, v0.4h +; NONEON-NOSVE-NEXT: umull2 v1.4s, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: umull v2.4s, v2.4h, v3.4h +; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp2 v1.8h, v2.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %1 = zext <16 x i16> %op1 to <16 x i32> @@ -651,6 +868,12 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE2-NEXT: umulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umull v0.2d, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: shrn v0.2s, v0.2d, #32 +; NONEON-NOSVE-NEXT: ret %1 = zext <2 x i32> %op1 to <2 x i64> %2 = zext <2 x i32> %op2 to <2 x i64> %mul = mul <2 x i64> %1, %2 @@ -676,6 +899,13 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE2-NEXT: umulh z0.s, z0.s, z1.s ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umull2 v2.2d, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: umull v0.2d, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v2.4s +; NONEON-NOSVE-NEXT: ret %1 = zext <4 x i32> %op1 to <4 x i64> %2 = zext <4 x i32> %op2 to <4 x i64> %mul = mul <4 x i64> %1, %2 @@ -704,6 +934,19 @@ define void @umulh_v8i32(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: umull2 v4.2d, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umull v0.2d, v1.2s, v0.2s +; NONEON-NOSVE-NEXT: umull2 v1.2d, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: umull v2.2d, v2.2s, v3.2s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp2 v1.4s, v2.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %insert = insertelement <8 x i64> undef, i64 32, i64 0 @@ -734,6 +977,16 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE2-NEXT: umulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: fmov x9, d1 +; NONEON-NOSVE-NEXT: umulh x8, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %1 = zext <1 x i64> %op1 to <1 x i128> %2 = zext <1 x i64> %op2 to <1 x i128> %mul = mul <1 x i128> %1, %2 @@ -759,6 +1012,19 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE2-NEXT: umulh z0.d, z0.d, z1.d ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: mov x9, v1.d[1] +; NONEON-NOSVE-NEXT: fmov x10, d0 +; NONEON-NOSVE-NEXT: fmov x11, d1 +; NONEON-NOSVE-NEXT: umulh x10, x10, x11 +; NONEON-NOSVE-NEXT: umulh x8, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: ret %1 = zext <2 x i64> %op1 to <2 x i128> %2 = zext <2 x i64> %op2 to <2 x i128> %mul = mul <2 x i128> %1, %2 @@ -787,6 +1053,31 @@ define void @umulh_v4i64(ptr %a, ptr %b) { ; SVE2-NEXT: umulh z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret +; +; NONEON-NOSVE-LABEL: umulh_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x11, v0.d[1] +; NONEON-NOSVE-NEXT: mov x14, v3.d[1] +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: mov x10, v1.d[1] +; NONEON-NOSVE-NEXT: mov x13, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x12, d3 +; NONEON-NOSVE-NEXT: umulh x8, x8, x9 +; NONEON-NOSVE-NEXT: fmov x9, d2 +; NONEON-NOSVE-NEXT: umulh x10, x10, x11 +; NONEON-NOSVE-NEXT: umulh x9, x9, x12 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: umulh x11, x13, x14 +; NONEON-NOSVE-NEXT: fmov d1, x10 +; NONEON-NOSVE-NEXT: fmov d2, x9 +; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0] +; NONEON-NOSVE-NEXT: fmov d3, x11 +; NONEON-NOSVE-NEXT: mov v2.d[1], v3.d[0] +; NONEON-NOSVE-NEXT: stp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %1 = zext <4 x i64> %op1 to <4 x i128> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll index 1123907f338993..ad75ba62e17cf8 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,12 @@ define i8 @uaddv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) ret i8 %res } @@ -30,6 +37,12 @@ define i8 @uaddv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) ret i8 %res } @@ -44,6 +57,14 @@ define i8 @uaddv_v32i8(ptr %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: addv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op) ret i8 %res @@ -58,6 +79,12 @@ define i16 @uaddv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) ret i16 %res } @@ -71,6 +98,12 @@ define i16 @uaddv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) ret i16 %res } @@ -85,6 +118,14 @@ define i16 @uaddv_v16i16(ptr %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: addv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op) ret i16 %res @@ -99,6 +140,12 @@ define i32 @uaddv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) ret i32 %res } @@ -112,6 +159,12 @@ define i32 @uaddv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) ret i32 %res } @@ -126,6 +179,14 @@ define i32 @uaddv_v8i32(ptr %a) { ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: addv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op) ret i32 %res @@ -139,6 +200,12 @@ define i64 @uaddv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: addp d0, v0.2d +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) ret i64 %res } @@ -152,6 +219,14 @@ define i64 @uaddv_v4i64(ptr %a) { ; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uaddv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: add v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: addp d0, v0.2d +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op) ret i64 %res @@ -169,6 +244,12 @@ define i8 @smaxv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smaxv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a) ret i8 %res } @@ -181,6 +262,12 @@ define i8 @smaxv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smaxv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a) ret i8 %res } @@ -194,6 +281,14 @@ define i8 @smaxv_v32i8(ptr %a) { ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: smax v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: smaxv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op) ret i8 %res @@ -207,6 +302,12 @@ define i16 @smaxv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smaxv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a) ret i16 %res } @@ -219,6 +320,12 @@ define i16 @smaxv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smaxv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a) ret i16 %res } @@ -232,6 +339,14 @@ define i16 @smaxv_v16i16(ptr %a) { ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: smax v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: smaxv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op) ret i16 %res @@ -245,6 +360,12 @@ define i32 @smaxv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smaxp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a) ret i32 %res } @@ -257,6 +378,12 @@ define i32 @smaxv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smaxv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) ret i32 %res } @@ -270,6 +397,14 @@ define i32 @smaxv_v8i32(ptr %a) { ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: smax v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: smaxv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op) ret i32 %res @@ -284,6 +419,17 @@ define i64 @smaxv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmgt d2, d0, d1 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a) ret i64 %res } @@ -297,6 +443,20 @@ define i64 @smaxv_v4i64(ptr %a) { ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: smaxv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmgt d2, d0, d1 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op) ret i64 %res @@ -314,6 +474,12 @@ define i8 @sminv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sminv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a) ret i8 %res } @@ -326,6 +492,12 @@ define i8 @sminv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sminv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a) ret i8 %res } @@ -339,6 +511,14 @@ define i8 @sminv_v32i8(ptr %a) { ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: smin v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: sminv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op) ret i8 %res @@ -352,6 +532,12 @@ define i16 @sminv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sminv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a) ret i16 %res } @@ -364,6 +550,12 @@ define i16 @sminv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sminv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a) ret i16 %res } @@ -377,6 +569,14 @@ define i16 @sminv_v16i16(ptr %a) { ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: smin v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: sminv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op) ret i16 %res @@ -390,6 +590,12 @@ define i32 @sminv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sminp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a) ret i32 %res } @@ -402,6 +608,12 @@ define i32 @sminv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sminv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) ret i32 %res } @@ -415,6 +627,14 @@ define i32 @sminv_v8i32(ptr %a) { ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: smin v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: sminv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op) ret i32 %res @@ -429,6 +649,17 @@ define i64 @sminv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmgt d2, d1, d0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a) ret i64 %res } @@ -442,6 +673,20 @@ define i64 @sminv_v4i64(ptr %a) { ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sminv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmgt d2, d1, d0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op) ret i64 %res @@ -459,6 +704,12 @@ define i8 @umaxv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umaxv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a) ret i8 %res } @@ -471,6 +722,12 @@ define i8 @umaxv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umaxv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a) ret i8 %res } @@ -484,6 +741,14 @@ define i8 @umaxv_v32i8(ptr %a) { ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: umax v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: umaxv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op) ret i8 %res @@ -497,6 +762,12 @@ define i16 @umaxv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umaxv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a) ret i16 %res } @@ -509,6 +780,12 @@ define i16 @umaxv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umaxv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a) ret i16 %res } @@ -522,6 +799,14 @@ define i16 @umaxv_v16i16(ptr %a) { ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: umax v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: umaxv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op) ret i16 %res @@ -535,6 +820,12 @@ define i32 @umaxv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umaxp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a) ret i32 %res } @@ -547,6 +838,12 @@ define i32 @umaxv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umaxv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) ret i32 %res } @@ -560,6 +857,14 @@ define i32 @umaxv_v8i32(ptr %a) { ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: umax v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: umaxv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op) ret i32 %res @@ -574,6 +879,17 @@ define i64 @umaxv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmhi d2, d0, d1 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) ret i64 %res } @@ -587,6 +903,20 @@ define i64 @umaxv_v4i64(ptr %a) { ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: umaxv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmhi d2, d0, d1 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op) ret i64 %res @@ -604,6 +934,12 @@ define i8 @uminv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uminv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a) ret i8 %res } @@ -616,6 +952,12 @@ define i8 @uminv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uminv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a) ret i8 %res } @@ -629,6 +971,14 @@ define i8 @uminv_v32i8(ptr %a) { ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: umin v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uminv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op) ret i8 %res @@ -642,6 +992,12 @@ define i16 @uminv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uminv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a) ret i16 %res } @@ -654,6 +1010,12 @@ define i16 @uminv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uminv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a) ret i16 %res } @@ -667,6 +1029,14 @@ define i16 @uminv_v16i16(ptr %a) { ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: umin v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uminv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op) ret i16 %res @@ -680,6 +1050,12 @@ define i32 @uminv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uminp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a) ret i32 %res } @@ -692,6 +1068,12 @@ define i32 @uminv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: uminv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) ret i32 %res } @@ -705,6 +1087,14 @@ define i32 @uminv_v8i32(ptr %a) { ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: umin v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uminv s0, v0.4s +; NONEON-NOSVE-NEXT: fmov w0, s0 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op) ret i32 %res @@ -719,6 +1109,17 @@ define i64 @uminv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmhi d2, d1, d0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a) ret i64 %res } @@ -732,6 +1133,20 @@ define i64 @uminv_v4i64(ptr %a) { ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uminv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: cmhi d2, d1, d0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op) ret i64 %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll index 4ae7586fca1692..99f8aef9f2b22d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -24,6 +25,35 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: smov w11, v1.h[0] +; NONEON-NOSVE-NEXT: smov w12, v0.h[0] +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w14, v1.h[2] +; NONEON-NOSVE-NEXT: smov w15, v0.h[2] +; NONEON-NOSVE-NEXT: smov w17, v1.h[3] +; NONEON-NOSVE-NEXT: smov w18, v0.h[3] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s0, w11 +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v0.h[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = srem <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -53,6 +83,53 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: smov w11, v1.b[0] +; NONEON-NOSVE-NEXT: smov w12, v0.b[0] +; NONEON-NOSVE-NEXT: smov w8, v1.b[1] +; NONEON-NOSVE-NEXT: smov w9, v0.b[1] +; NONEON-NOSVE-NEXT: smov w14, v1.b[2] +; NONEON-NOSVE-NEXT: smov w15, v0.b[2] +; NONEON-NOSVE-NEXT: smov w17, v1.b[3] +; NONEON-NOSVE-NEXT: smov w18, v0.b[3] +; NONEON-NOSVE-NEXT: smov w1, v1.b[4] +; NONEON-NOSVE-NEXT: smov w2, v0.b[4] +; NONEON-NOSVE-NEXT: smov w4, v1.b[5] +; NONEON-NOSVE-NEXT: smov w5, v0.b[5] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.b[7] +; NONEON-NOSVE-NEXT: fmov s2, w11 +; NONEON-NOSVE-NEXT: smov w11, v0.b[6] +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[6] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: sdiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: smov w14, v0.b[7] +; NONEON-NOSVE-NEXT: mov v2.b[2], w8 +; NONEON-NOSVE-NEXT: sdiv w3, w2, w1 +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: mov v2.b[3], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w5, w4 +; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2 +; NONEON-NOSVE-NEXT: mov v2.b[4], w8 +; NONEON-NOSVE-NEXT: sdiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5 +; NONEON-NOSVE-NEXT: mov v2.b[5], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: mov v2.b[6], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14 +; NONEON-NOSVE-NEXT: mov v2.b[7], w8 +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %res = srem <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -102,6 +179,112 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: smov w11, v1.b[0] +; NONEON-NOSVE-NEXT: smov w12, v0.b[0] +; NONEON-NOSVE-NEXT: smov w8, v1.b[1] +; NONEON-NOSVE-NEXT: smov w9, v0.b[1] +; NONEON-NOSVE-NEXT: smov w14, v1.b[2] +; NONEON-NOSVE-NEXT: smov w15, v0.b[2] +; NONEON-NOSVE-NEXT: smov w17, v1.b[3] +; NONEON-NOSVE-NEXT: smov w18, v0.b[3] +; NONEON-NOSVE-NEXT: smov w1, v1.b[4] +; NONEON-NOSVE-NEXT: smov w2, v0.b[4] +; NONEON-NOSVE-NEXT: smov w4, v1.b[5] +; NONEON-NOSVE-NEXT: smov w5, v0.b[5] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: smov w7, v1.b[6] +; NONEON-NOSVE-NEXT: smov w19, v0.b[6] +; NONEON-NOSVE-NEXT: smov w21, v1.b[7] +; NONEON-NOSVE-NEXT: smov w22, v0.b[7] +; NONEON-NOSVE-NEXT: smov w24, v1.b[8] +; NONEON-NOSVE-NEXT: smov w25, v0.b[8] +; NONEON-NOSVE-NEXT: smov w27, v1.b[9] +; NONEON-NOSVE-NEXT: smov w28, v0.b[9] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.b[11] +; NONEON-NOSVE-NEXT: fmov s2, w11 +; NONEON-NOSVE-NEXT: smov w11, v0.b[10] +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.b[10] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: sdiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: smov w14, v0.b[11] +; NONEON-NOSVE-NEXT: smov w16, v1.b[12] +; NONEON-NOSVE-NEXT: mov v2.b[2], w8 +; NONEON-NOSVE-NEXT: sdiv w3, w2, w1 +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: smov w17, v0.b[12] +; NONEON-NOSVE-NEXT: smov w0, v1.b[13] +; NONEON-NOSVE-NEXT: mov v2.b[3], w8 +; NONEON-NOSVE-NEXT: sdiv w6, w5, w4 +; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2 +; NONEON-NOSVE-NEXT: smov w1, v0.b[13] +; NONEON-NOSVE-NEXT: mov v2.b[4], w8 +; NONEON-NOSVE-NEXT: sdiv w20, w19, w7 +; NONEON-NOSVE-NEXT: msub w8, w6, w4, w5 +; NONEON-NOSVE-NEXT: mov v2.b[5], w8 +; NONEON-NOSVE-NEXT: sdiv w23, w22, w21 +; NONEON-NOSVE-NEXT: msub w8, w20, w7, w19 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v2.b[6], w8 +; NONEON-NOSVE-NEXT: sdiv w26, w25, w24 +; NONEON-NOSVE-NEXT: msub w8, w23, w21, w22 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v2.b[7], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w28, w27 +; NONEON-NOSVE-NEXT: msub w8, w26, w24, w25 +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v2.b[8], w8 +; NONEON-NOSVE-NEXT: sdiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w27, w28 +; NONEON-NOSVE-NEXT: mov v2.b[9], w8 +; NONEON-NOSVE-NEXT: sdiv w15, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: smov w10, v1.b[14] +; NONEON-NOSVE-NEXT: smov w11, v0.b[14] +; NONEON-NOSVE-NEXT: mov v2.b[10], w8 +; NONEON-NOSVE-NEXT: sdiv w18, w17, w16 +; NONEON-NOSVE-NEXT: msub w8, w15, w13, w14 +; NONEON-NOSVE-NEXT: smov w13, v1.b[15] +; NONEON-NOSVE-NEXT: smov w14, v0.b[15] +; NONEON-NOSVE-NEXT: mov v2.b[11], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w1, w0 +; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17 +; NONEON-NOSVE-NEXT: mov v2.b[12], w8 +; NONEON-NOSVE-NEXT: sdiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w0, w1 +; NONEON-NOSVE-NEXT: mov v2.b[13], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: mov v2.b[14], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14 +; NONEON-NOSVE-NEXT: mov v2.b[15], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %res = srem <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -189,6 +372,279 @@ define void @srem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mls z2.b, p0/m, z7.b, z4.b ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #320 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #272] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #288] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #304] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 320 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: str x0, [sp, #216] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w8, v1.b[1] +; NONEON-NOSVE-NEXT: smov w9, v0.b[1] +; NONEON-NOSVE-NEXT: smov w4, v3.b[1] +; NONEON-NOSVE-NEXT: smov w1, v2.b[1] +; NONEON-NOSVE-NEXT: smov w7, v3.b[7] +; NONEON-NOSVE-NEXT: smov w5, v2.b[7] +; NONEON-NOSVE-NEXT: smov w6, v3.b[8] +; NONEON-NOSVE-NEXT: smov w3, v2.b[8] +; NONEON-NOSVE-NEXT: smov w22, v3.b[9] +; NONEON-NOSVE-NEXT: smov w20, v2.b[9] +; NONEON-NOSVE-NEXT: smov w13, v3.b[0] +; NONEON-NOSVE-NEXT: smov w17, v3.b[3] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #100] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w8, v1.b[0] +; NONEON-NOSVE-NEXT: str w9, [sp, #108] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w9, v0.b[0] +; NONEON-NOSVE-NEXT: smov w14, v2.b[3] +; NONEON-NOSVE-NEXT: smov w15, v3.b[4] +; NONEON-NOSVE-NEXT: smov w12, v2.b[4] +; NONEON-NOSVE-NEXT: smov w2, v3.b[5] +; NONEON-NOSVE-NEXT: smov w18, v2.b[5] +; NONEON-NOSVE-NEXT: smov w0, v3.b[6] +; NONEON-NOSVE-NEXT: smov w16, v2.b[6] +; NONEON-NOSVE-NEXT: smov w21, v3.b[10] +; NONEON-NOSVE-NEXT: smov w19, v2.b[10] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w30, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: str w10, [sp, #116] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[2] +; NONEON-NOSVE-NEXT: smov w9, v0.b[2] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #44] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[3] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #52] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w9, v0.b[3] +; NONEON-NOSVE-NEXT: sdiv w26, w14, w17 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w11, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[4] +; NONEON-NOSVE-NEXT: smov w9, v0.b[4] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #60] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[5] +; NONEON-NOSVE-NEXT: smov w9, v0.b[5] +; NONEON-NOSVE-NEXT: str w8, [sp, #96] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #104] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #68] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[6] +; NONEON-NOSVE-NEXT: smov w9, v0.b[6] +; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #112] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[7] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w9, v0.b[7] +; NONEON-NOSVE-NEXT: sdiv w25, w12, w15 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[8] +; NONEON-NOSVE-NEXT: smov w9, v0.b[8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #140] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[9] +; NONEON-NOSVE-NEXT: smov w9, v0.b[9] +; NONEON-NOSVE-NEXT: str w8, [sp, #148] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #156] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w11, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[10] +; NONEON-NOSVE-NEXT: smov w9, v0.b[10] +; NONEON-NOSVE-NEXT: str w10, [sp, #128] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[11] +; NONEON-NOSVE-NEXT: smov w9, v0.b[11] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #192] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #212] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[12] +; NONEON-NOSVE-NEXT: smov w9, v0.b[12] +; NONEON-NOSVE-NEXT: str w8, [sp, #172] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #180] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #200] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[13] +; NONEON-NOSVE-NEXT: smov w9, v0.b[13] +; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #164] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w11, v3.b[2] +; NONEON-NOSVE-NEXT: str w9, [sp, #176] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #188] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.b[14] +; NONEON-NOSVE-NEXT: smov w9, v0.b[14] +; NONEON-NOSVE-NEXT: str w8, [sp, #144] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #152] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #184] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w9, v2.b[2] +; NONEON-NOSVE-NEXT: sdiv w8, w1, w4 +; NONEON-NOSVE-NEXT: str w10, [sp, #160] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w10, v2.b[0] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w8, w5, w7 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w8, w3, w6 +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w8, w20, w22 +; NONEON-NOSVE-NEXT: sdiv w24, w10, w13 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w29, w8, [sp, #40] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w8, w30, w29 +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: fmov s4, w8 +; NONEON-NOSVE-NEXT: sdiv w23, w9, w11 +; NONEON-NOSVE-NEXT: msub w10, w24, w13, w10 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w24, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w13, w13, w4, w1 +; NONEON-NOSVE-NEXT: ldr w1, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w4, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: fmov s5, w10 +; NONEON-NOSVE-NEXT: msub w1, w1, w24, w4 +; NONEON-NOSVE-NEXT: mov v5.b[1], w13 +; NONEON-NOSVE-NEXT: mov v4.b[1], w1 +; NONEON-NOSVE-NEXT: ldr w1, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w23, w11, w9 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w28, w18, w2 +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #52] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #272] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w26, w17, w14 +; NONEON-NOSVE-NEXT: ldr w14, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w11, w10 +; NONEON-NOSVE-NEXT: ldr w17, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: smov w10, v3.b[11] +; NONEON-NOSVE-NEXT: smov w11, v2.b[11] +; NONEON-NOSVE-NEXT: mov v4.b[2], w9 +; NONEON-NOSVE-NEXT: mov v5.b[3], w8 +; NONEON-NOSVE-NEXT: msub w8, w25, w15, w12 +; NONEON-NOSVE-NEXT: ldp w13, w9, [sp, #76] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w27, w16, w0 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w14, w13 +; NONEON-NOSVE-NEXT: ldr w14, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[4], w8 +; NONEON-NOSVE-NEXT: msub w8, w28, w2, w18 +; NONEON-NOSVE-NEXT: ldr w2, [sp, #156] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[3], w9 +; NONEON-NOSVE-NEXT: ldp w12, w9, [sp, #64] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[5], w8 +; NONEON-NOSVE-NEXT: msub w8, w27, w0, w16 +; NONEON-NOSVE-NEXT: ldr w0, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w4, w19, w21 +; NONEON-NOSVE-NEXT: msub w9, w9, w14, w12 +; NONEON-NOSVE-NEXT: smov w12, v3.b[12] +; NONEON-NOSVE-NEXT: smov w14, v2.b[12] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #240] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[6], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[4], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w8, w7, w5 +; NONEON-NOSVE-NEXT: ldr w5, [sp, #204] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w17, w15 +; NONEON-NOSVE-NEXT: ldr w17, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[7], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w13, w11, w10 +; NONEON-NOSVE-NEXT: mov v4.b[5], w9 +; NONEON-NOSVE-NEXT: ldp w16, w9, [sp, #88] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w8, w6, w3 +; NONEON-NOSVE-NEXT: ldr w3, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w17, w16 +; NONEON-NOSVE-NEXT: smov w16, v3.b[13] +; NONEON-NOSVE-NEXT: smov w17, v2.b[13] +; NONEON-NOSVE-NEXT: mov v5.b[8], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[6], w9 +; NONEON-NOSVE-NEXT: msub w8, w8, w22, w20 +; NONEON-NOSVE-NEXT: sdiv w15, w14, w12 +; NONEON-NOSVE-NEXT: ldp w18, w9, [sp, #136] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[9], w8 +; NONEON-NOSVE-NEXT: msub w8, w4, w21, w19 +; NONEON-NOSVE-NEXT: msub w9, w9, w0, w18 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #304] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #288] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[7], w9 +; NONEON-NOSVE-NEXT: mov v5.b[10], w8 +; NONEON-NOSVE-NEXT: msub w8, w13, w10, w11 +; NONEON-NOSVE-NEXT: ldp w0, w9, [sp, #124] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #196] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #192] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w18, w17, w16 +; NONEON-NOSVE-NEXT: msub w9, w9, w1, w0 +; NONEON-NOSVE-NEXT: mov v5.b[11], w8 +; NONEON-NOSVE-NEXT: smov w0, v3.b[14] +; NONEON-NOSVE-NEXT: msub w10, w10, w13, w11 +; NONEON-NOSVE-NEXT: smov w1, v2.b[14] +; NONEON-NOSVE-NEXT: msub w8, w15, w12, w14 +; NONEON-NOSVE-NEXT: mov v4.b[8], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w15, w13, [sp, #168] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w3, w2 +; NONEON-NOSVE-NEXT: mov v5.b[12], w8 +; NONEON-NOSVE-NEXT: ldp w4, w3, [sp, #208] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w14, w12, [sp, #176] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[9], w9 +; NONEON-NOSVE-NEXT: sdiv w2, w1, w0 +; NONEON-NOSVE-NEXT: smov w9, v3.b[15] +; NONEON-NOSVE-NEXT: msub w3, w3, w5, w4 +; NONEON-NOSVE-NEXT: smov w4, v2.b[15] +; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17 +; NONEON-NOSVE-NEXT: ldr w16, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[10], w3 +; NONEON-NOSVE-NEXT: mov v5.b[13], w8 +; NONEON-NOSVE-NEXT: mov v4.b[11], w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #188] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w11, w4, w9 +; NONEON-NOSVE-NEXT: msub w8, w2, w0, w1 +; NONEON-NOSVE-NEXT: msub w10, w10, w13, w12 +; NONEON-NOSVE-NEXT: smov w12, v1.b[15] +; NONEON-NOSVE-NEXT: smov w13, v0.b[15] +; NONEON-NOSVE-NEXT: mov v5.b[14], w8 +; NONEON-NOSVE-NEXT: mov v4.b[12], w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #184] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w10, w10, w15, w14 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #152] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w14, w13, w12 +; NONEON-NOSVE-NEXT: msub w8, w11, w9, w4 +; NONEON-NOSVE-NEXT: mov v4.b[13], w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #160] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[15], w8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #216] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w10, w10, w16, w15 +; NONEON-NOSVE-NEXT: mov v4.b[14], w10 +; NONEON-NOSVE-NEXT: msub w9, w14, w12, w13 +; NONEON-NOSVE-NEXT: mov v4.b[15], w9 +; NONEON-NOSVE-NEXT: stp q5, q4, [x8] +; NONEON-NOSVE-NEXT: add sp, sp, #320 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = srem <32 x i8> %op1, %op2 @@ -210,6 +666,33 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: smov w11, v1.h[0] +; NONEON-NOSVE-NEXT: smov w12, v0.h[0] +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w14, v1.h[2] +; NONEON-NOSVE-NEXT: smov w15, v0.h[2] +; NONEON-NOSVE-NEXT: smov w17, v1.h[3] +; NONEON-NOSVE-NEXT: smov w18, v0.h[3] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s0, w11 +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v0.h[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = srem <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -238,6 +721,51 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: smov w11, v1.h[0] +; NONEON-NOSVE-NEXT: smov w12, v0.h[0] +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w14, v1.h[2] +; NONEON-NOSVE-NEXT: smov w15, v0.h[2] +; NONEON-NOSVE-NEXT: smov w17, v1.h[3] +; NONEON-NOSVE-NEXT: smov w18, v0.h[3] +; NONEON-NOSVE-NEXT: smov w1, v1.h[4] +; NONEON-NOSVE-NEXT: smov w2, v0.h[4] +; NONEON-NOSVE-NEXT: smov w4, v1.h[5] +; NONEON-NOSVE-NEXT: smov w5, v0.h[5] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: smov w13, v1.h[7] +; NONEON-NOSVE-NEXT: fmov s2, w11 +; NONEON-NOSVE-NEXT: smov w11, v0.h[6] +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: smov w10, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: sdiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: smov w14, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: sdiv w3, w2, w1 +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w5, w4 +; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: sdiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5 +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14 +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = srem <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -282,6 +810,139 @@ define void @srem_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mls z0.h, p0/m, z7.h, z1.h ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: smov w8, v1.h[1] +; NONEON-NOSVE-NEXT: smov w9, v0.h[1] +; NONEON-NOSVE-NEXT: smov w20, v1.h[0] +; NONEON-NOSVE-NEXT: smov w21, v0.h[0] +; NONEON-NOSVE-NEXT: smov w19, v0.h[3] +; NONEON-NOSVE-NEXT: smov w5, v1.h[4] +; NONEON-NOSVE-NEXT: smov w2, v0.h[4] +; NONEON-NOSVE-NEXT: smov w1, v3.h[1] +; NONEON-NOSVE-NEXT: smov w23, v2.h[1] +; NONEON-NOSVE-NEXT: smov w25, v3.h[0] +; NONEON-NOSVE-NEXT: smov w26, v2.h[0] +; NONEON-NOSVE-NEXT: smov w6, v1.h[5] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w8, v1.h[2] +; NONEON-NOSVE-NEXT: smov w9, v0.h[2] +; NONEON-NOSVE-NEXT: smov w3, v0.h[5] +; NONEON-NOSVE-NEXT: smov w4, v1.h[6] +; NONEON-NOSVE-NEXT: smov w7, v0.h[6] +; NONEON-NOSVE-NEXT: smov w28, v3.h[2] +; NONEON-NOSVE-NEXT: smov w29, v2.h[2] +; NONEON-NOSVE-NEXT: smov w15, v3.h[3] +; NONEON-NOSVE-NEXT: smov w13, v2.h[3] +; NONEON-NOSVE-NEXT: smov w12, v3.h[4] +; NONEON-NOSVE-NEXT: smov w14, v3.h[5] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w11, w21, w20 +; NONEON-NOSVE-NEXT: str w10, [sp, #44] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: smov w8, v1.h[3] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w11, v2.h[4] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #4] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w20, w22, w20, w21 +; NONEON-NOSVE-NEXT: sdiv w9, w19, w8 +; NONEON-NOSVE-NEXT: str w10, [sp, #32] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w10, v3.h[6] +; NONEON-NOSVE-NEXT: fmov s5, w20 +; NONEON-NOSVE-NEXT: smov w20, v3.h[7] +; NONEON-NOSVE-NEXT: sdiv w8, w2, w5 +; NONEON-NOSVE-NEXT: sdiv w24, w23, w1 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: sdiv w27, w26, w25 +; NONEON-NOSVE-NEXT: msub w1, w24, w1, w23 +; NONEON-NOSVE-NEXT: ldp w24, w23, [sp, #40] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w9, w3, w6 +; NONEON-NOSVE-NEXT: msub w21, w27, w25, w26 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w23, w23, w25, w24 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: fmov s4, w21 +; NONEON-NOSVE-NEXT: mov v5.h[1], w23 +; NONEON-NOSVE-NEXT: ldp w23, w21, [sp, #28] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.h[1], w1 +; NONEON-NOSVE-NEXT: sdiv w8, w7, w4 +; NONEON-NOSVE-NEXT: msub w21, w21, w25, w23 +; NONEON-NOSVE-NEXT: smov w23, v2.h[7] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.h[2], w21 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: sdiv w30, w29, w28 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: smov w9, v2.h[5] +; NONEON-NOSVE-NEXT: smov w8, v2.h[6] +; NONEON-NOSVE-NEXT: sdiv w18, w13, w15 +; NONEON-NOSVE-NEXT: msub w1, w30, w28, w29 +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.h[2], w1 +; NONEON-NOSVE-NEXT: sdiv w16, w11, w12 +; NONEON-NOSVE-NEXT: msub w13, w18, w15, w13 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w18, [sp] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w15, w15, w18, w19 +; NONEON-NOSVE-NEXT: mov v4.h[3], w13 +; NONEON-NOSVE-NEXT: smov w13, v1.h[7] +; NONEON-NOSVE-NEXT: mov v5.h[3], w15 +; NONEON-NOSVE-NEXT: smov w15, v0.h[7] +; NONEON-NOSVE-NEXT: sdiv w17, w9, w14 +; NONEON-NOSVE-NEXT: msub w11, w16, w12, w11 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #16] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w12, w12, w5, w2 +; NONEON-NOSVE-NEXT: mov v4.h[4], w11 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.h[4], w12 +; NONEON-NOSVE-NEXT: msub w11, w11, w6, w3 +; NONEON-NOSVE-NEXT: sdiv w24, w8, w10 +; NONEON-NOSVE-NEXT: msub w9, w17, w14, w9 +; NONEON-NOSVE-NEXT: mov v5.h[5], w11 +; NONEON-NOSVE-NEXT: mov v4.h[5], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w4, w7 +; NONEON-NOSVE-NEXT: sdiv w18, w23, w20 +; NONEON-NOSVE-NEXT: msub w8, w24, w10, w8 +; NONEON-NOSVE-NEXT: mov v5.h[6], w9 +; NONEON-NOSVE-NEXT: mov v4.h[6], w8 +; NONEON-NOSVE-NEXT: sdiv w12, w15, w13 +; NONEON-NOSVE-NEXT: msub w8, w18, w20, w23 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.h[7], w8 +; NONEON-NOSVE-NEXT: msub w9, w12, w13, w15 +; NONEON-NOSVE-NEXT: mov v5.h[7], w9 +; NONEON-NOSVE-NEXT: stp q4, q5, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = srem <16 x i16> %op1, %op2 @@ -300,6 +961,23 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: mov w11, v1.s[1] +; NONEON-NOSVE-NEXT: mov w12, v0.s[1] +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12 +; NONEON-NOSVE-NEXT: mov v0.s[1], w9 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = srem <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -315,6 +993,30 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov w11, s1 +; NONEON-NOSVE-NEXT: fmov w12, s0 +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: mov w14, v1.s[2] +; NONEON-NOSVE-NEXT: mov w15, v0.s[2] +; NONEON-NOSVE-NEXT: mov w17, v1.s[3] +; NONEON-NOSVE-NEXT: mov w18, v0.s[3] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s0, w11 +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v0.s[1], w8 +; NONEON-NOSVE-NEXT: sdiv w9, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v0.s[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.s[3], w8 +; NONEON-NOSVE-NEXT: ret %res = srem <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -334,6 +1036,65 @@ define void @srem_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -48 +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov w12, s0 +; NONEON-NOSVE-NEXT: fmov w3, s2 +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w11, s1 +; NONEON-NOSVE-NEXT: fmov w2, s3 +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w17, v3.s[1] +; NONEON-NOSVE-NEXT: mov w18, v2.s[1] +; NONEON-NOSVE-NEXT: mov w14, v1.s[2] +; NONEON-NOSVE-NEXT: mov w15, v0.s[2] +; NONEON-NOSVE-NEXT: mov w5, v3.s[2] +; NONEON-NOSVE-NEXT: mov w6, v2.s[2] +; NONEON-NOSVE-NEXT: sdiv w13, w12, w11 +; NONEON-NOSVE-NEXT: mov w19, v3.s[3] +; NONEON-NOSVE-NEXT: mov w20, v2.s[3] +; NONEON-NOSVE-NEXT: mov w22, v1.s[3] +; NONEON-NOSVE-NEXT: mov w23, v0.s[3] +; NONEON-NOSVE-NEXT: sdiv w4, w3, w2 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s1, w11 +; NONEON-NOSVE-NEXT: sdiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w12, w4, w2, w3 +; NONEON-NOSVE-NEXT: fmov s0, w12 +; NONEON-NOSVE-NEXT: sdiv w1, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v1.s[1], w8 +; NONEON-NOSVE-NEXT: sdiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w13, w1, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.s[1], w13 +; NONEON-NOSVE-NEXT: sdiv w7, w6, w5 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v1.s[2], w8 +; NONEON-NOSVE-NEXT: sdiv w21, w20, w19 +; NONEON-NOSVE-NEXT: msub w10, w7, w5, w6 +; NONEON-NOSVE-NEXT: mov v0.s[2], w10 +; NONEON-NOSVE-NEXT: sdiv w9, w23, w22 +; NONEON-NOSVE-NEXT: msub w10, w21, w19, w20 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v0.s[3], w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w22, w23 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v1.s[3], w8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = srem <8 x i32> %op1, %op2 @@ -352,6 +1113,17 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = srem <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -367,6 +1139,20 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x11, v1.d[1] +; NONEON-NOSVE-NEXT: mov x12, v0.d[1] +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: sdiv x13, x12, x11 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: ret %res = srem <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -386,6 +1172,33 @@ define void @srem_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: srem_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: fmov x15, d2 +; NONEON-NOSVE-NEXT: mov x12, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x14, d3 +; NONEON-NOSVE-NEXT: mov x11, v3.d[1] +; NONEON-NOSVE-NEXT: mov x17, v1.d[1] +; NONEON-NOSVE-NEXT: mov x18, v0.d[1] +; NONEON-NOSVE-NEXT: sdiv x10, x9, x8 +; NONEON-NOSVE-NEXT: sdiv x16, x15, x14 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: sdiv x13, x12, x11 +; NONEON-NOSVE-NEXT: msub x10, x16, x14, x15 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: sdiv x1, x18, x17 +; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: msub x11, x1, x17, x18 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = srem <4 x i64> %op1, %op2 @@ -413,6 +1226,41 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w11, v1.h[0] +; NONEON-NOSVE-NEXT: umov w12, v0.h[0] +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w14, v1.h[2] +; NONEON-NOSVE-NEXT: umov w15, v0.h[2] +; NONEON-NOSVE-NEXT: umov w17, v1.h[3] +; NONEON-NOSVE-NEXT: umov w18, v0.h[3] +; NONEON-NOSVE-NEXT: and w11, w11, #0xff +; NONEON-NOSVE-NEXT: and w12, w12, #0xff +; NONEON-NOSVE-NEXT: and w8, w8, #0xff +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: and w9, w9, #0xff +; NONEON-NOSVE-NEXT: and w14, w14, #0xff +; NONEON-NOSVE-NEXT: and w15, w15, #0xff +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: and w12, w17, #0xff +; NONEON-NOSVE-NEXT: and w13, w18, #0xff +; NONEON-NOSVE-NEXT: fmov s0, w11 +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: udiv w9, w13, w12 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v0.h[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w12, w13 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = urem <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -442,6 +1290,53 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w11, v1.b[0] +; NONEON-NOSVE-NEXT: umov w12, v0.b[0] +; NONEON-NOSVE-NEXT: umov w8, v1.b[1] +; NONEON-NOSVE-NEXT: umov w9, v0.b[1] +; NONEON-NOSVE-NEXT: umov w14, v1.b[2] +; NONEON-NOSVE-NEXT: umov w15, v0.b[2] +; NONEON-NOSVE-NEXT: umov w17, v1.b[3] +; NONEON-NOSVE-NEXT: umov w18, v0.b[3] +; NONEON-NOSVE-NEXT: umov w1, v1.b[4] +; NONEON-NOSVE-NEXT: umov w2, v0.b[4] +; NONEON-NOSVE-NEXT: umov w4, v1.b[5] +; NONEON-NOSVE-NEXT: umov w5, v0.b[5] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.b[7] +; NONEON-NOSVE-NEXT: fmov s2, w11 +; NONEON-NOSVE-NEXT: umov w11, v0.b[6] +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[6] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: udiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: umov w14, v0.b[7] +; NONEON-NOSVE-NEXT: mov v2.b[2], w8 +; NONEON-NOSVE-NEXT: udiv w3, w2, w1 +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: mov v2.b[3], w8 +; NONEON-NOSVE-NEXT: udiv w9, w5, w4 +; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2 +; NONEON-NOSVE-NEXT: mov v2.b[4], w8 +; NONEON-NOSVE-NEXT: udiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5 +; NONEON-NOSVE-NEXT: mov v2.b[5], w8 +; NONEON-NOSVE-NEXT: udiv w9, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: mov v2.b[6], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14 +; NONEON-NOSVE-NEXT: mov v2.b[7], w8 +; NONEON-NOSVE-NEXT: fmov d0, d2 +; NONEON-NOSVE-NEXT: ret %res = urem <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -491,6 +1386,112 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: umov w11, v1.b[0] +; NONEON-NOSVE-NEXT: umov w12, v0.b[0] +; NONEON-NOSVE-NEXT: umov w8, v1.b[1] +; NONEON-NOSVE-NEXT: umov w9, v0.b[1] +; NONEON-NOSVE-NEXT: umov w14, v1.b[2] +; NONEON-NOSVE-NEXT: umov w15, v0.b[2] +; NONEON-NOSVE-NEXT: umov w17, v1.b[3] +; NONEON-NOSVE-NEXT: umov w18, v0.b[3] +; NONEON-NOSVE-NEXT: umov w1, v1.b[4] +; NONEON-NOSVE-NEXT: umov w2, v0.b[4] +; NONEON-NOSVE-NEXT: umov w4, v1.b[5] +; NONEON-NOSVE-NEXT: umov w5, v0.b[5] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: umov w7, v1.b[6] +; NONEON-NOSVE-NEXT: umov w19, v0.b[6] +; NONEON-NOSVE-NEXT: umov w21, v1.b[7] +; NONEON-NOSVE-NEXT: umov w22, v0.b[7] +; NONEON-NOSVE-NEXT: umov w24, v1.b[8] +; NONEON-NOSVE-NEXT: umov w25, v0.b[8] +; NONEON-NOSVE-NEXT: umov w27, v1.b[9] +; NONEON-NOSVE-NEXT: umov w28, v0.b[9] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.b[11] +; NONEON-NOSVE-NEXT: fmov s2, w11 +; NONEON-NOSVE-NEXT: umov w11, v0.b[10] +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.b[10] +; NONEON-NOSVE-NEXT: mov v2.b[1], w8 +; NONEON-NOSVE-NEXT: udiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: umov w14, v0.b[11] +; NONEON-NOSVE-NEXT: umov w16, v1.b[12] +; NONEON-NOSVE-NEXT: mov v2.b[2], w8 +; NONEON-NOSVE-NEXT: udiv w3, w2, w1 +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: umov w17, v0.b[12] +; NONEON-NOSVE-NEXT: umov w0, v1.b[13] +; NONEON-NOSVE-NEXT: mov v2.b[3], w8 +; NONEON-NOSVE-NEXT: udiv w6, w5, w4 +; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2 +; NONEON-NOSVE-NEXT: umov w1, v0.b[13] +; NONEON-NOSVE-NEXT: mov v2.b[4], w8 +; NONEON-NOSVE-NEXT: udiv w20, w19, w7 +; NONEON-NOSVE-NEXT: msub w8, w6, w4, w5 +; NONEON-NOSVE-NEXT: mov v2.b[5], w8 +; NONEON-NOSVE-NEXT: udiv w23, w22, w21 +; NONEON-NOSVE-NEXT: msub w8, w20, w7, w19 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v2.b[6], w8 +; NONEON-NOSVE-NEXT: udiv w26, w25, w24 +; NONEON-NOSVE-NEXT: msub w8, w23, w21, w22 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v2.b[7], w8 +; NONEON-NOSVE-NEXT: udiv w9, w28, w27 +; NONEON-NOSVE-NEXT: msub w8, w26, w24, w25 +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v2.b[8], w8 +; NONEON-NOSVE-NEXT: udiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w27, w28 +; NONEON-NOSVE-NEXT: mov v2.b[9], w8 +; NONEON-NOSVE-NEXT: udiv w15, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: umov w10, v1.b[14] +; NONEON-NOSVE-NEXT: umov w11, v0.b[14] +; NONEON-NOSVE-NEXT: mov v2.b[10], w8 +; NONEON-NOSVE-NEXT: udiv w18, w17, w16 +; NONEON-NOSVE-NEXT: msub w8, w15, w13, w14 +; NONEON-NOSVE-NEXT: umov w13, v1.b[15] +; NONEON-NOSVE-NEXT: umov w14, v0.b[15] +; NONEON-NOSVE-NEXT: mov v2.b[11], w8 +; NONEON-NOSVE-NEXT: udiv w9, w1, w0 +; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17 +; NONEON-NOSVE-NEXT: mov v2.b[12], w8 +; NONEON-NOSVE-NEXT: udiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w0, w1 +; NONEON-NOSVE-NEXT: mov v2.b[13], w8 +; NONEON-NOSVE-NEXT: udiv w9, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: mov v2.b[14], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14 +; NONEON-NOSVE-NEXT: mov v2.b[15], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %res = urem <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -578,6 +1579,279 @@ define void @urem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mls z2.b, p0/m, z7.b, z4.b ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #320 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #224] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #240] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #256] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #272] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #288] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #304] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 320 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: str x0, [sp, #216] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w8, v1.b[1] +; NONEON-NOSVE-NEXT: umov w9, v0.b[1] +; NONEON-NOSVE-NEXT: umov w4, v3.b[1] +; NONEON-NOSVE-NEXT: umov w1, v2.b[1] +; NONEON-NOSVE-NEXT: umov w7, v3.b[7] +; NONEON-NOSVE-NEXT: umov w5, v2.b[7] +; NONEON-NOSVE-NEXT: umov w6, v3.b[8] +; NONEON-NOSVE-NEXT: umov w3, v2.b[8] +; NONEON-NOSVE-NEXT: umov w22, v3.b[9] +; NONEON-NOSVE-NEXT: umov w20, v2.b[9] +; NONEON-NOSVE-NEXT: umov w13, v3.b[0] +; NONEON-NOSVE-NEXT: umov w17, v3.b[3] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: str w8, [sp, #100] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w8, v1.b[0] +; NONEON-NOSVE-NEXT: str w9, [sp, #108] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w9, v0.b[0] +; NONEON-NOSVE-NEXT: umov w14, v2.b[3] +; NONEON-NOSVE-NEXT: umov w15, v3.b[4] +; NONEON-NOSVE-NEXT: umov w12, v2.b[4] +; NONEON-NOSVE-NEXT: umov w2, v3.b[5] +; NONEON-NOSVE-NEXT: umov w18, v2.b[5] +; NONEON-NOSVE-NEXT: umov w0, v3.b[6] +; NONEON-NOSVE-NEXT: umov w16, v2.b[6] +; NONEON-NOSVE-NEXT: umov w21, v3.b[10] +; NONEON-NOSVE-NEXT: umov w19, v2.b[10] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: ldr w30, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: str w10, [sp, #116] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[2] +; NONEON-NOSVE-NEXT: umov w9, v0.b[2] +; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #44] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[3] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #52] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w9, v0.b[3] +; NONEON-NOSVE-NEXT: udiv w26, w14, w17 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w11, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[4] +; NONEON-NOSVE-NEXT: umov w9, v0.b[4] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #60] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[5] +; NONEON-NOSVE-NEXT: umov w9, v0.b[5] +; NONEON-NOSVE-NEXT: str w8, [sp, #96] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #104] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #68] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[6] +; NONEON-NOSVE-NEXT: umov w9, v0.b[6] +; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #80] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #112] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[7] +; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #88] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w9, v0.b[7] +; NONEON-NOSVE-NEXT: udiv w25, w12, w15 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[8] +; NONEON-NOSVE-NEXT: umov w9, v0.b[8] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #140] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[9] +; NONEON-NOSVE-NEXT: umov w9, v0.b[9] +; NONEON-NOSVE-NEXT: str w8, [sp, #148] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #156] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w11, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[10] +; NONEON-NOSVE-NEXT: umov w9, v0.b[10] +; NONEON-NOSVE-NEXT: str w10, [sp, #128] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[11] +; NONEON-NOSVE-NEXT: umov w9, v0.b[11] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #192] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #212] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[12] +; NONEON-NOSVE-NEXT: umov w9, v0.b[12] +; NONEON-NOSVE-NEXT: str w8, [sp, #172] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #180] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #200] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[13] +; NONEON-NOSVE-NEXT: umov w9, v0.b[13] +; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #164] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w11, v3.b[2] +; NONEON-NOSVE-NEXT: str w9, [sp, #176] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #188] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.b[14] +; NONEON-NOSVE-NEXT: umov w9, v0.b[14] +; NONEON-NOSVE-NEXT: str w8, [sp, #144] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w9, [sp, #152] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: str w10, [sp, #184] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w9, v2.b[2] +; NONEON-NOSVE-NEXT: udiv w8, w1, w4 +; NONEON-NOSVE-NEXT: str w10, [sp, #160] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w10, v2.b[0] +; NONEON-NOSVE-NEXT: str w8, [sp, #24] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w8, w5, w7 +; NONEON-NOSVE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w8, w3, w6 +; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w8, w20, w22 +; NONEON-NOSVE-NEXT: udiv w24, w10, w13 +; NONEON-NOSVE-NEXT: str w8, [sp, #32] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: ldp w29, w8, [sp, #40] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w8, w30, w29 +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #224] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: fmov s4, w8 +; NONEON-NOSVE-NEXT: udiv w23, w9, w11 +; NONEON-NOSVE-NEXT: msub w10, w24, w13, w10 +; NONEON-NOSVE-NEXT: ldr w13, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w24, [sp, #100] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w13, w13, w4, w1 +; NONEON-NOSVE-NEXT: ldr w1, [sp, #116] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w4, [sp, #108] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: fmov s5, w10 +; NONEON-NOSVE-NEXT: msub w1, w1, w24, w4 +; NONEON-NOSVE-NEXT: mov v5.b[1], w13 +; NONEON-NOSVE-NEXT: mov v4.b[1], w1 +; NONEON-NOSVE-NEXT: ldr w1, [sp, #120] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w23, w11, w9 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #48] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w28, w18, w2 +; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #52] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #272] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w26, w17, w14 +; NONEON-NOSVE-NEXT: ldr w14, [sp, #72] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w11, w10 +; NONEON-NOSVE-NEXT: ldr w17, [sp, #96] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: umov w10, v3.b[11] +; NONEON-NOSVE-NEXT: umov w11, v2.b[11] +; NONEON-NOSVE-NEXT: mov v4.b[2], w9 +; NONEON-NOSVE-NEXT: mov v5.b[3], w8 +; NONEON-NOSVE-NEXT: msub w8, w25, w15, w12 +; NONEON-NOSVE-NEXT: ldp w13, w9, [sp, #76] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w27, w16, w0 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #104] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #256] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w14, w13 +; NONEON-NOSVE-NEXT: ldr w14, [sp, #60] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[4], w8 +; NONEON-NOSVE-NEXT: msub w8, w28, w2, w18 +; NONEON-NOSVE-NEXT: ldr w2, [sp, #156] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[3], w9 +; NONEON-NOSVE-NEXT: ldp w12, w9, [sp, #64] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[5], w8 +; NONEON-NOSVE-NEXT: msub w8, w27, w0, w16 +; NONEON-NOSVE-NEXT: ldr w0, [sp, #132] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w4, w19, w21 +; NONEON-NOSVE-NEXT: msub w9, w9, w14, w12 +; NONEON-NOSVE-NEXT: umov w12, v3.b[12] +; NONEON-NOSVE-NEXT: umov w14, v2.b[12] +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #240] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[6], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[4], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #112] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w8, w7, w5 +; NONEON-NOSVE-NEXT: ldr w5, [sp, #204] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w17, w15 +; NONEON-NOSVE-NEXT: ldr w17, [sp, #84] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[7], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w13, w11, w10 +; NONEON-NOSVE-NEXT: mov v4.b[5], w9 +; NONEON-NOSVE-NEXT: ldp w16, w9, [sp, #88] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w8, w8, w6, w3 +; NONEON-NOSVE-NEXT: ldr w3, [sp, #148] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w17, w16 +; NONEON-NOSVE-NEXT: umov w16, v3.b[13] +; NONEON-NOSVE-NEXT: umov w17, v2.b[13] +; NONEON-NOSVE-NEXT: mov v5.b[8], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[6], w9 +; NONEON-NOSVE-NEXT: msub w8, w8, w22, w20 +; NONEON-NOSVE-NEXT: udiv w15, w14, w12 +; NONEON-NOSVE-NEXT: ldp w18, w9, [sp, #136] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[9], w8 +; NONEON-NOSVE-NEXT: msub w8, w4, w21, w19 +; NONEON-NOSVE-NEXT: msub w9, w9, w0, w18 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #304] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #288] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[7], w9 +; NONEON-NOSVE-NEXT: mov v5.b[10], w8 +; NONEON-NOSVE-NEXT: msub w8, w13, w10, w11 +; NONEON-NOSVE-NEXT: ldp w0, w9, [sp, #124] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #196] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w13, [sp, #192] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w18, w17, w16 +; NONEON-NOSVE-NEXT: msub w9, w9, w1, w0 +; NONEON-NOSVE-NEXT: mov v5.b[11], w8 +; NONEON-NOSVE-NEXT: umov w0, v3.b[14] +; NONEON-NOSVE-NEXT: msub w10, w10, w13, w11 +; NONEON-NOSVE-NEXT: umov w1, v2.b[14] +; NONEON-NOSVE-NEXT: msub w8, w15, w12, w14 +; NONEON-NOSVE-NEXT: mov v4.b[8], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w15, w13, [sp, #168] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w3, w2 +; NONEON-NOSVE-NEXT: mov v5.b[12], w8 +; NONEON-NOSVE-NEXT: ldp w4, w3, [sp, #208] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp w14, w12, [sp, #176] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[9], w9 +; NONEON-NOSVE-NEXT: udiv w2, w1, w0 +; NONEON-NOSVE-NEXT: umov w9, v3.b[15] +; NONEON-NOSVE-NEXT: msub w3, w3, w5, w4 +; NONEON-NOSVE-NEXT: umov w4, v2.b[15] +; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17 +; NONEON-NOSVE-NEXT: ldr w16, [sp, #144] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.b[10], w3 +; NONEON-NOSVE-NEXT: mov v5.b[13], w8 +; NONEON-NOSVE-NEXT: mov v4.b[11], w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #188] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w11, w4, w9 +; NONEON-NOSVE-NEXT: msub w8, w2, w0, w1 +; NONEON-NOSVE-NEXT: msub w10, w10, w13, w12 +; NONEON-NOSVE-NEXT: umov w12, v1.b[15] +; NONEON-NOSVE-NEXT: umov w13, v0.b[15] +; NONEON-NOSVE-NEXT: mov v5.b[14], w8 +; NONEON-NOSVE-NEXT: mov v4.b[12], w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #184] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w10, w10, w15, w14 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #152] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w14, w13, w12 +; NONEON-NOSVE-NEXT: msub w8, w11, w9, w4 +; NONEON-NOSVE-NEXT: mov v4.b[13], w10 +; NONEON-NOSVE-NEXT: ldr w10, [sp, #160] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.b[15], w8 +; NONEON-NOSVE-NEXT: ldr x8, [sp, #216] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w10, w10, w16, w15 +; NONEON-NOSVE-NEXT: mov v4.b[14], w10 +; NONEON-NOSVE-NEXT: msub w9, w14, w12, w13 +; NONEON-NOSVE-NEXT: mov v4.b[15], w9 +; NONEON-NOSVE-NEXT: stp q5, q4, [x8] +; NONEON-NOSVE-NEXT: add sp, sp, #320 +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = urem <32 x i8> %op1, %op2 @@ -599,6 +1873,33 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w11, v1.h[0] +; NONEON-NOSVE-NEXT: umov w12, v0.h[0] +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w14, v1.h[2] +; NONEON-NOSVE-NEXT: umov w15, v0.h[2] +; NONEON-NOSVE-NEXT: umov w17, v1.h[3] +; NONEON-NOSVE-NEXT: umov w18, v0.h[3] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s0, w11 +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v0.h[1], w8 +; NONEON-NOSVE-NEXT: udiv w9, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v0.h[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.h[3], w8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = urem <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -627,6 +1928,51 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: umov w11, v1.h[0] +; NONEON-NOSVE-NEXT: umov w12, v0.h[0] +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w14, v1.h[2] +; NONEON-NOSVE-NEXT: umov w15, v0.h[2] +; NONEON-NOSVE-NEXT: umov w17, v1.h[3] +; NONEON-NOSVE-NEXT: umov w18, v0.h[3] +; NONEON-NOSVE-NEXT: umov w1, v1.h[4] +; NONEON-NOSVE-NEXT: umov w2, v0.h[4] +; NONEON-NOSVE-NEXT: umov w4, v1.h[5] +; NONEON-NOSVE-NEXT: umov w5, v0.h[5] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: umov w13, v1.h[7] +; NONEON-NOSVE-NEXT: fmov s2, w11 +; NONEON-NOSVE-NEXT: umov w11, v0.h[6] +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: umov w10, v1.h[6] +; NONEON-NOSVE-NEXT: mov v2.h[1], w8 +; NONEON-NOSVE-NEXT: udiv w0, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: umov w14, v0.h[7] +; NONEON-NOSVE-NEXT: mov v2.h[2], w8 +; NONEON-NOSVE-NEXT: udiv w3, w2, w1 +; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18 +; NONEON-NOSVE-NEXT: mov v2.h[3], w8 +; NONEON-NOSVE-NEXT: udiv w9, w5, w4 +; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2 +; NONEON-NOSVE-NEXT: mov v2.h[4], w8 +; NONEON-NOSVE-NEXT: udiv w12, w11, w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5 +; NONEON-NOSVE-NEXT: mov v2.h[5], w8 +; NONEON-NOSVE-NEXT: udiv w9, w14, w13 +; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11 +; NONEON-NOSVE-NEXT: mov v2.h[6], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14 +; NONEON-NOSVE-NEXT: mov v2.h[7], w8 +; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %res = urem <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -671,6 +2017,139 @@ define void @urem_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: mls z0.h, p0/m, z7.h, z1.h ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #144 +; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #64] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #80] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 +; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 +; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 +; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 +; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 +; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 +; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 +; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q2, [x0] +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: umov w8, v1.h[1] +; NONEON-NOSVE-NEXT: umov w9, v0.h[1] +; NONEON-NOSVE-NEXT: umov w20, v1.h[0] +; NONEON-NOSVE-NEXT: umov w21, v0.h[0] +; NONEON-NOSVE-NEXT: umov w19, v0.h[3] +; NONEON-NOSVE-NEXT: umov w5, v1.h[4] +; NONEON-NOSVE-NEXT: umov w2, v0.h[4] +; NONEON-NOSVE-NEXT: umov w1, v3.h[1] +; NONEON-NOSVE-NEXT: umov w23, v2.h[1] +; NONEON-NOSVE-NEXT: umov w25, v3.h[0] +; NONEON-NOSVE-NEXT: umov w26, v2.h[0] +; NONEON-NOSVE-NEXT: umov w6, v1.h[5] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w8, v1.h[2] +; NONEON-NOSVE-NEXT: umov w9, v0.h[2] +; NONEON-NOSVE-NEXT: umov w3, v0.h[5] +; NONEON-NOSVE-NEXT: umov w4, v1.h[6] +; NONEON-NOSVE-NEXT: umov w7, v0.h[6] +; NONEON-NOSVE-NEXT: umov w28, v3.h[2] +; NONEON-NOSVE-NEXT: umov w29, v2.h[2] +; NONEON-NOSVE-NEXT: umov w15, v3.h[3] +; NONEON-NOSVE-NEXT: umov w13, v2.h[3] +; NONEON-NOSVE-NEXT: umov w12, v3.h[4] +; NONEON-NOSVE-NEXT: umov w14, v3.h[5] +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w11, w21, w20 +; NONEON-NOSVE-NEXT: str w10, [sp, #44] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: umov w8, v1.h[3] +; NONEON-NOSVE-NEXT: stp w8, w11, [sp] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w11, v2.h[4] +; NONEON-NOSVE-NEXT: ldr w22, [sp, #4] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w20, w22, w20, w21 +; NONEON-NOSVE-NEXT: udiv w9, w19, w8 +; NONEON-NOSVE-NEXT: str w10, [sp, #32] // 4-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w10, v3.h[6] +; NONEON-NOSVE-NEXT: fmov s5, w20 +; NONEON-NOSVE-NEXT: umov w20, v3.h[7] +; NONEON-NOSVE-NEXT: udiv w8, w2, w5 +; NONEON-NOSVE-NEXT: udiv w24, w23, w1 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: udiv w27, w26, w25 +; NONEON-NOSVE-NEXT: msub w1, w24, w1, w23 +; NONEON-NOSVE-NEXT: ldp w24, w23, [sp, #40] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w9, w3, w6 +; NONEON-NOSVE-NEXT: msub w21, w27, w25, w26 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w23, w23, w25, w24 +; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: fmov s4, w21 +; NONEON-NOSVE-NEXT: mov v5.h[1], w23 +; NONEON-NOSVE-NEXT: ldp w23, w21, [sp, #28] // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.h[1], w1 +; NONEON-NOSVE-NEXT: udiv w8, w7, w4 +; NONEON-NOSVE-NEXT: msub w21, w21, w25, w23 +; NONEON-NOSVE-NEXT: umov w23, v2.h[7] +; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #80] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.h[2], w21 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: udiv w30, w29, w28 +; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: umov w9, v2.h[5] +; NONEON-NOSVE-NEXT: umov w8, v2.h[6] +; NONEON-NOSVE-NEXT: udiv w18, w13, w15 +; NONEON-NOSVE-NEXT: msub w1, w30, w28, w29 +; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #64] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.h[2], w1 +; NONEON-NOSVE-NEXT: udiv w16, w11, w12 +; NONEON-NOSVE-NEXT: msub w13, w18, w15, w13 +; NONEON-NOSVE-NEXT: ldr w15, [sp, #20] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: ldr w18, [sp] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w15, w15, w18, w19 +; NONEON-NOSVE-NEXT: mov v4.h[3], w13 +; NONEON-NOSVE-NEXT: umov w13, v1.h[7] +; NONEON-NOSVE-NEXT: mov v5.h[3], w15 +; NONEON-NOSVE-NEXT: umov w15, v0.h[7] +; NONEON-NOSVE-NEXT: udiv w17, w9, w14 +; NONEON-NOSVE-NEXT: msub w11, w16, w12, w11 +; NONEON-NOSVE-NEXT: ldr w12, [sp, #16] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w12, w12, w5, w2 +; NONEON-NOSVE-NEXT: mov v4.h[4], w11 +; NONEON-NOSVE-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v5.h[4], w12 +; NONEON-NOSVE-NEXT: msub w11, w11, w6, w3 +; NONEON-NOSVE-NEXT: udiv w24, w8, w10 +; NONEON-NOSVE-NEXT: msub w9, w17, w14, w9 +; NONEON-NOSVE-NEXT: mov v5.h[5], w11 +; NONEON-NOSVE-NEXT: mov v4.h[5], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload +; NONEON-NOSVE-NEXT: msub w9, w9, w4, w7 +; NONEON-NOSVE-NEXT: udiv w18, w23, w20 +; NONEON-NOSVE-NEXT: msub w8, w24, w10, w8 +; NONEON-NOSVE-NEXT: mov v5.h[6], w9 +; NONEON-NOSVE-NEXT: mov v4.h[6], w8 +; NONEON-NOSVE-NEXT: udiv w12, w15, w13 +; NONEON-NOSVE-NEXT: msub w8, w18, w20, w23 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v4.h[7], w8 +; NONEON-NOSVE-NEXT: msub w9, w12, w13, w15 +; NONEON-NOSVE-NEXT: mov v5.h[7], w9 +; NONEON-NOSVE-NEXT: stp q4, q5, [x0] +; NONEON-NOSVE-NEXT: add sp, sp, #144 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = urem <16 x i16> %op1, %op2 @@ -689,6 +2168,23 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: mov w11, v1.s[1] +; NONEON-NOSVE-NEXT: mov w12, v0.s[1] +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12 +; NONEON-NOSVE-NEXT: mov v0.s[1], w9 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = urem <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -704,6 +2200,30 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov w11, s1 +; NONEON-NOSVE-NEXT: fmov w12, s0 +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: mov w14, v1.s[2] +; NONEON-NOSVE-NEXT: mov w15, v0.s[2] +; NONEON-NOSVE-NEXT: mov w17, v1.s[3] +; NONEON-NOSVE-NEXT: mov w18, v0.s[3] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s0, w11 +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v0.s[1], w8 +; NONEON-NOSVE-NEXT: udiv w9, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v0.s[2], w8 +; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.s[3], w8 +; NONEON-NOSVE-NEXT: ret %res = urem <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -723,6 +2243,65 @@ define void @urem_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 +; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 +; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 +; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 +; NONEON-NOSVE-NEXT: .cfi_offset w23, -48 +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov w12, s0 +; NONEON-NOSVE-NEXT: fmov w3, s2 +; NONEON-NOSVE-NEXT: mov w9, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w11, s1 +; NONEON-NOSVE-NEXT: fmov w2, s3 +; NONEON-NOSVE-NEXT: mov w8, v1.s[1] +; NONEON-NOSVE-NEXT: mov w17, v3.s[1] +; NONEON-NOSVE-NEXT: mov w18, v2.s[1] +; NONEON-NOSVE-NEXT: mov w14, v1.s[2] +; NONEON-NOSVE-NEXT: mov w15, v0.s[2] +; NONEON-NOSVE-NEXT: mov w5, v3.s[2] +; NONEON-NOSVE-NEXT: mov w6, v2.s[2] +; NONEON-NOSVE-NEXT: udiv w13, w12, w11 +; NONEON-NOSVE-NEXT: mov w19, v3.s[3] +; NONEON-NOSVE-NEXT: mov w20, v2.s[3] +; NONEON-NOSVE-NEXT: mov w22, v1.s[3] +; NONEON-NOSVE-NEXT: mov w23, v0.s[3] +; NONEON-NOSVE-NEXT: udiv w4, w3, w2 +; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12 +; NONEON-NOSVE-NEXT: fmov s1, w11 +; NONEON-NOSVE-NEXT: udiv w10, w9, w8 +; NONEON-NOSVE-NEXT: msub w12, w4, w2, w3 +; NONEON-NOSVE-NEXT: fmov s0, w12 +; NONEON-NOSVE-NEXT: udiv w1, w18, w17 +; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9 +; NONEON-NOSVE-NEXT: mov v1.s[1], w8 +; NONEON-NOSVE-NEXT: udiv w16, w15, w14 +; NONEON-NOSVE-NEXT: msub w13, w1, w17, w18 +; NONEON-NOSVE-NEXT: mov v0.s[1], w13 +; NONEON-NOSVE-NEXT: udiv w7, w6, w5 +; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15 +; NONEON-NOSVE-NEXT: mov v1.s[2], w8 +; NONEON-NOSVE-NEXT: udiv w21, w20, w19 +; NONEON-NOSVE-NEXT: msub w10, w7, w5, w6 +; NONEON-NOSVE-NEXT: mov v0.s[2], w10 +; NONEON-NOSVE-NEXT: udiv w9, w23, w22 +; NONEON-NOSVE-NEXT: msub w10, w21, w19, w20 +; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v0.s[3], w10 +; NONEON-NOSVE-NEXT: msub w8, w9, w22, w23 +; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; NONEON-NOSVE-NEXT: mov v1.s[3], w8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = urem <8 x i32> %op1, %op2 @@ -741,6 +2320,17 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: ret %res = urem <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -756,6 +2346,20 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: mov x11, v1.d[1] +; NONEON-NOSVE-NEXT: mov x12, v0.d[1] +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: udiv x13, x12, x11 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: fmov d0, x8 +; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: ret %res = urem <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -775,6 +2379,33 @@ define void @urem_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: urem_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: fmov x15, d2 +; NONEON-NOSVE-NEXT: mov x12, v2.d[1] +; NONEON-NOSVE-NEXT: fmov x8, d1 +; NONEON-NOSVE-NEXT: fmov x14, d3 +; NONEON-NOSVE-NEXT: mov x11, v3.d[1] +; NONEON-NOSVE-NEXT: mov x17, v1.d[1] +; NONEON-NOSVE-NEXT: mov x18, v0.d[1] +; NONEON-NOSVE-NEXT: udiv x10, x9, x8 +; NONEON-NOSVE-NEXT: udiv x16, x15, x14 +; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9 +; NONEON-NOSVE-NEXT: fmov d1, x8 +; NONEON-NOSVE-NEXT: udiv x13, x12, x11 +; NONEON-NOSVE-NEXT: msub x10, x16, x14, x15 +; NONEON-NOSVE-NEXT: fmov d0, x10 +; NONEON-NOSVE-NEXT: udiv x1, x18, x17 +; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12 +; NONEON-NOSVE-NEXT: mov v0.d[1], x9 +; NONEON-NOSVE-NEXT: msub x11, x1, x17, x18 +; NONEON-NOSVE-NEXT: mov v1.d[1], x11 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = urem <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll index bfffe4b6315d74..0108fb580b947b 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -16,6 +17,14 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.4h, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x i8> %op1, <4 x i8> %op2 ret <4 x i8> %sel } @@ -31,6 +40,14 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.8b, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2 ret <8 x i8> %sel } @@ -46,6 +63,14 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.16b, w8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2 ret <16 x i8> %sel } @@ -64,6 +89,20 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.b, p0, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.16b, w8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <32 x i8>, ptr %a %op2 = load volatile <32 x i8>, ptr %b %sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2 @@ -83,6 +122,14 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.2s, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x i16> %op1, <2 x i16> %op2 ret <2 x i16> %sel } @@ -99,6 +146,14 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.4h, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2 ret <4 x i16> %sel } @@ -115,6 +170,14 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.8h, w8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2 ret <8 x i16> %sel } @@ -134,6 +197,20 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <16 x i16>, ptr %a %op2 = load volatile <16 x i16>, ptr %b %sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2 @@ -153,6 +230,14 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.2s, w8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2 ret <2 x i32> %sel } @@ -169,6 +254,14 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: dup v2.4s, w8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2 ret <4 x i32> %sel } @@ -188,6 +281,20 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm w8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <8 x i32>, ptr %a %op2 = load volatile <8 x i32>, ptr %b %sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2 @@ -208,6 +315,14 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: fmov d2, x8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2 ret <1 x i64> %sel } @@ -225,6 +340,14 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: dup v2.2d, x8 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2 ret <2 x i64> %sel } @@ -245,6 +368,20 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) { ; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w2, #0x1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q2, [x0, #16] +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: ldr q3, [x1] +; NONEON-NOSVE-NEXT: ldr q4, [x1, #16] +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b +; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load volatile <4 x i64>, ptr %a %op2 = load volatile <4 x i64>, ptr %b %sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll index 9319bd69c25fb6..f7198e3042ad53 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,16 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h +; NONEON-NOSVE-NEXT: sshl v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = ashr <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -32,6 +43,12 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.8b, v1.8b +; NONEON-NOSVE-NEXT: sshl v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = ashr <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -45,6 +62,12 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b +; NONEON-NOSVE-NEXT: sshl v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = ashr <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -60,6 +83,17 @@ define void @ashr_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.16b, v0.16b +; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b +; NONEON-NOSVE-NEXT: sshl v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: sshl v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = ashr <32 x i8> %op1, %op2 @@ -78,6 +112,16 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s +; NONEON-NOSVE-NEXT: sshl v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = ashr <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -91,6 +135,12 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h +; NONEON-NOSVE-NEXT: sshl v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = ashr <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -104,6 +154,12 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h +; NONEON-NOSVE-NEXT: sshl v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = ashr <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -119,6 +175,17 @@ define void @ashr_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.8h, v0.8h +; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h +; NONEON-NOSVE-NEXT: sshl v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: sshl v1.8h, v3.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = ashr <16 x i16> %op1, %op2 @@ -135,6 +202,12 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s +; NONEON-NOSVE-NEXT: sshl v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = ashr <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -148,6 +221,12 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s +; NONEON-NOSVE-NEXT: sshl v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = ashr <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -163,6 +242,17 @@ define void @ashr_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s +; NONEON-NOSVE-NEXT: sshl v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: sshl v1.4s, v3.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = ashr <8 x i32> %op1, %op2 @@ -179,6 +269,12 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg d1, d1 +; NONEON-NOSVE-NEXT: sshl d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = ashr <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -192,6 +288,12 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d +; NONEON-NOSVE-NEXT: sshl v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = ashr <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -207,6 +309,17 @@ define void @ashr_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: asr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ashr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d +; NONEON-NOSVE-NEXT: sshl v0.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: sshl v1.2d, v3.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = ashr <4 x i64> %op1, %op2 @@ -229,6 +342,15 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b +; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h +; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = lshr <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -242,6 +364,12 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.8b, v1.8b +; NONEON-NOSVE-NEXT: ushl v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = lshr <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -255,6 +383,12 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b +; NONEON-NOSVE-NEXT: ushl v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = lshr <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -270,6 +404,17 @@ define void @lshr_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.16b, v0.16b +; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b +; NONEON-NOSVE-NEXT: ushl v0.16b, v2.16b, v0.16b +; NONEON-NOSVE-NEXT: ushl v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = lshr <32 x i8> %op1, %op2 @@ -288,6 +433,15 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b +; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s +; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = lshr <2 x i16> %op1, %op2 ret <2 x i16> %res } @@ -301,6 +455,12 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h +; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = lshr <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -314,6 +474,12 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h +; NONEON-NOSVE-NEXT: ushl v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = lshr <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -329,6 +495,17 @@ define void @lshr_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.8h, v0.8h +; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h +; NONEON-NOSVE-NEXT: ushl v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: ushl v1.8h, v3.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = lshr <16 x i16> %op1, %op2 @@ -345,6 +522,12 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s +; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = lshr <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -358,6 +541,12 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ushl v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = lshr <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -373,6 +562,17 @@ define void @lshr_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.4s, v0.4s +; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ushl v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: ushl v1.4s, v3.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = lshr <8 x i32> %op1, %op2 @@ -389,6 +589,12 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg d1, d1 +; NONEON-NOSVE-NEXT: ushl d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = lshr <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -402,6 +608,12 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ushl v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = lshr <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -417,6 +629,17 @@ define void @lshr_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: lshr_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: neg v0.2d, v0.2d +; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ushl v0.2d, v2.2d, v0.2d +; NONEON-NOSVE-NEXT: ushl v1.2d, v3.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = lshr <4 x i64> %op1, %op2 @@ -438,6 +661,13 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) { ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v2i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0x0000ff000000ff +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = shl <2 x i8> %op1, %op2 ret <2 x i8> %res } @@ -452,6 +682,13 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = shl <4 x i8> %op1, %op2 ret <4 x i8> %res } @@ -465,6 +702,11 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ret %res = shl <8 x i8> %op1, %op2 ret <8 x i8> %res } @@ -478,6 +720,11 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %res = shl <16 x i8> %op1, %op2 ret <16 x i8> %res } @@ -493,6 +740,15 @@ define void @shl_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ushl v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: ushl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = shl <32 x i8> %op1, %op2 @@ -509,6 +765,11 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %res = shl <4 x i16> %op1, %op2 ret <4 x i16> %res } @@ -522,6 +783,11 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: ret %res = shl <8 x i16> %op1, %op2 ret <8 x i16> %res } @@ -537,6 +803,15 @@ define void @shl_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ushl v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: ushl v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = shl <16 x i16> %op1, %op2 @@ -553,6 +828,11 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: ret %res = shl <2 x i32> %op1, %op2 ret <2 x i32> %res } @@ -566,6 +846,11 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: ret %res = shl <4 x i32> %op1, %op2 ret <4 x i32> %res } @@ -581,6 +866,15 @@ define void @shl_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ushl v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: ushl v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %res = shl <8 x i32> %op1, %op2 @@ -597,6 +891,11 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl d0, d0, d1 +; NONEON-NOSVE-NEXT: ret %res = shl <1 x i64> %op1, %op2 ret <1 x i64> %res } @@ -610,6 +909,11 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushl v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: ret %res = shl <2 x i64> %op1, %op2 ret <2 x i64> %res } @@ -625,6 +929,15 @@ define void @shl_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shl_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ushl v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: ushl v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %res = shl <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll index 27dbfc9a23a8de..42d3b9d8f71f86 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,13 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i16> %op1 to <4 x half> ret <4 x half> %res } @@ -27,6 +35,22 @@ define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ushll v1.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v1.4s +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: str q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x half> store <8 x half> %res, ptr %b @@ -42,6 +66,29 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ushll v2.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v0.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ucvtf v2.4s, v2.4s +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ucvtf v3.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v1.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v3.4s +; NONEON-NOSVE-NEXT: stp q2, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x half> store <16 x half> %res, ptr %b @@ -61,6 +108,13 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ucvtf v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i16> %op1 to <2 x float> ret <2 x float> %res } @@ -74,6 +128,12 @@ define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i16> %op1 to <4 x float> ret <4 x float> %res } @@ -90,6 +150,20 @@ define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -114,6 +188,26 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ucvtf v2.4s, v2.4s +; NONEON-NOSVE-NEXT: ucvtf v3.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x float> store <16 x float> %res, ptr %b @@ -132,6 +226,13 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) { ; CHECK-NEXT: and w8, w8, #0xffff ; CHECK-NEXT: ucvtf d0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v1i16_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: umov w8, v0.h[0] +; NONEON-NOSVE-NEXT: ucvtf d0, w8 +; NONEON-NOSVE-NEXT: ret %res = uitofp <1 x i16> %op1 to <1 x double> ret <1 x double> %res } @@ -146,6 +247,14 @@ define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i16> %op1 to <2 x double> ret <2 x double> %res } @@ -163,6 +272,21 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %res = uitofp <4 x i16> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -191,6 +315,30 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q3, [x1] ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q2, [x1] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -239,6 +387,46 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #40] +; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ushll v6.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: ushll v7.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: ucvtf v5.2d, v5.2d +; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: ucvtf v4.2d, v4.2d +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v7.2d +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v6.2d +; NONEON-NOSVE-NEXT: stp q2, q0, [x1, #32] +; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -258,6 +446,13 @@ define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i32> %op1 to <2 x half> ret <2 x half> %res } @@ -271,6 +466,12 @@ define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i32> %op1 to <4 x half> ret <4 x half> %res } @@ -288,6 +489,15 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x half> ret <8 x half> %res @@ -312,6 +522,21 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v16i32_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ucvtf v2.4s, v2.4s +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ucvtf v3.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v1.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v3.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = uitofp <16 x i32> %op1 to <16 x half> store <16 x half> %res, ptr %b @@ -330,6 +555,11 @@ define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ucvtf v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i32> %op1 to <2 x float> ret <2 x float> %res } @@ -342,6 +572,11 @@ define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) { ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = uitofp <4 x i32> %op1 to <4 x float> ret <4 x float> %res } @@ -355,6 +590,14 @@ define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -374,6 +617,12 @@ define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i32> %op1 to <2 x double> ret <2 x double> %res } @@ -390,6 +639,20 @@ define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = uitofp <4 x i32> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -414,6 +677,26 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -440,6 +723,18 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: ucvtf s1, x9 +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h2, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s1 +; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i64> %op1 to <2 x half> ret <2 x half> %res } @@ -460,6 +755,16 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = uitofp <4 x i64> %op1 to <4 x half> ret <4 x half> %res @@ -493,6 +798,22 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32] +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn v2.2s, v2.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d +; NONEON-NOSVE-NEXT: fcvtn2 v2.4s, v3.2d +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v2.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i64>, ptr %a %res = uitofp <8 x i64> %op1 to <8 x half> ret <8 x half> %res @@ -511,6 +832,12 @@ define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i64> %op1 to <2 x float> ret <2 x float> %res } @@ -528,6 +855,15 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = uitofp <4 x i64> %op1 to <4 x float> ret <4 x float> %res @@ -552,6 +888,21 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn v1.2s, v1.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v2.2d +; NONEON-NOSVE-NEXT: fcvtn2 v1.4s, v3.2d +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i64>, ptr %a %res = uitofp <8 x i64> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -570,6 +921,11 @@ define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) { ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = uitofp <2 x i64> %op1 to <2 x double> ret <2 x double> %res } @@ -583,6 +939,14 @@ define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = uitofp <4 x i64> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -601,6 +965,13 @@ define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) { ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i16> %op1 to <4 x half> ret <4 x half> %res } @@ -613,6 +984,22 @@ define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: sshll v1.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] +; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v1.4s +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: str q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x half> store <8 x half> %res, ptr %b @@ -628,6 +1015,29 @@ define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: sshll v2.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v0.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: scvtf v2.4s, v2.4s +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: scvtf v3.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v1.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v3.4s +; NONEON-NOSVE-NEXT: stp q2, q0, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x half> store <16 x half> %res, ptr %b @@ -646,6 +1056,13 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: scvtf v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i16> %op1 to <2 x float> ret <2 x float> %res } @@ -659,6 +1076,12 @@ define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i16> %op1 to <4 x float> ret <4 x float> %res } @@ -675,6 +1098,20 @@ define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -699,6 +1136,26 @@ define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: scvtf v2.4s, v2.4s +; NONEON-NOSVE-NEXT: scvtf v3.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x float> store <16 x float> %res, ptr %b @@ -720,6 +1177,14 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i16> %op1 to <2 x double> ret <2 x double> %res } @@ -737,6 +1202,21 @@ define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %res = sitofp <4 x i16> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -765,6 +1245,30 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q3, [x1] ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-48]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #40] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q2, [x1] +; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #48 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -813,6 +1317,46 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #8] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 +; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 +; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64] +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: ldr d4, [sp, #88] +; NONEON-NOSVE-NEXT: ldr d6, [sp, #72] +; NONEON-NOSVE-NEXT: ldr d7, [sp, #40] +; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: sshll v6.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: sshll v7.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: scvtf v5.2d, v5.2d +; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: scvtf v4.2d, v4.2d +; NONEON-NOSVE-NEXT: stp q0, q5, [x1] +; NONEON-NOSVE-NEXT: scvtf v0.2d, v7.2d +; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64] +; NONEON-NOSVE-NEXT: scvtf v1.2d, v6.2d +; NONEON-NOSVE-NEXT: stp q2, q0, [x1, #32] +; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #96] +; NONEON-NOSVE-NEXT: add sp, sp, #96 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -832,6 +1376,13 @@ define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i32> %op1 to <2 x half> ret <2 x half> %res } @@ -845,6 +1396,12 @@ define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i32> %op1 to <4 x half> ret <4 x half> %res } @@ -862,6 +1419,15 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x half> ret <8 x half> %res @@ -879,6 +1445,11 @@ define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: scvtf v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i32> %op1 to <2 x float> ret <2 x float> %res } @@ -891,6 +1462,11 @@ define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) { ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %res = sitofp <4 x i32> %op1 to <4 x float> ret <4 x float> %res } @@ -904,6 +1480,14 @@ define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s +; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x float> store <8 x float> %res, ptr %b @@ -923,6 +1507,12 @@ define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i32> %op1 to <2 x double> ret <2 x double> %res } @@ -939,6 +1529,20 @@ define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = sitofp <4 x i32> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -963,6 +1567,26 @@ define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q0, [x1, #32] ; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 +; NONEON-NOSVE-NEXT: ldr d2, [sp, #24] +; NONEON-NOSVE-NEXT: ldr d3, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add sp, sp, #32 +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x double> store <8 x double> %res, ptr %b @@ -1007,6 +1631,40 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q2, q1, [x1] ; CHECK-NEXT: stp q4, q0, [x1, #32] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v16i32_v16f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #-64]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 +; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #32] +; NONEON-NOSVE-NEXT: ldr d4, [sp, #24] +; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: ldr d5, [sp, #56] +; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0 +; NONEON-NOSVE-NEXT: ldr d6, [sp, #40] +; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0 +; NONEON-NOSVE-NEXT: ldr d7, [sp, #8] +; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0 +; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d +; NONEON-NOSVE-NEXT: sshll v6.2d, v6.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d +; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0 +; NONEON-NOSVE-NEXT: sshll v7.2d, v7.2s, #0 +; NONEON-NOSVE-NEXT: scvtf v4.2d, v4.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: scvtf v5.2d, v5.2d +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: stp q2, q4, [x1, #96] +; NONEON-NOSVE-NEXT: scvtf v2.2d, v6.2d +; NONEON-NOSVE-NEXT: stp q3, q5, [x1, #64] +; NONEON-NOSVE-NEXT: scvtf v3.2d, v7.2d +; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q0, q3, [x1] +; NONEON-NOSVE-NEXT: add sp, sp, #64 +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = sitofp <16 x i32> %op1 to <16 x double> store <16 x double> %res, ptr %b @@ -1033,6 +1691,18 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) { ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov x8, v0.d[1] +; NONEON-NOSVE-NEXT: fmov x9, d0 +; NONEON-NOSVE-NEXT: scvtf s1, x9 +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h2, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s1 +; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i64> %op1 to <2 x half> ret <2 x half> %res } @@ -1053,6 +1723,16 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sitofp <4 x i64> %op1 to <4 x half> ret <4 x half> %res @@ -1071,6 +1751,12 @@ define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) { ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i64> %op1 to <2 x float> ret <2 x float> %res } @@ -1088,6 +1774,15 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sitofp <4 x i64> %op1 to <4 x float> ret <4 x float> %res @@ -1105,6 +1800,11 @@ define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) { ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: ret %res = sitofp <2 x i64> %op1 to <2 x double> ret <2 x double> %res } @@ -1118,6 +1818,14 @@ define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d +; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sitofp <4 x i64> %op1 to <4 x double> store <4 x double> %res, ptr %b @@ -1130,6 +1838,13 @@ define half @scvtf_i16_f16(ptr %0) { ; CHECK-NEXT: ldrsh w8, [x0] ; CHECK-NEXT: scvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i16_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrsh w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = sitofp i16 %2 to half ret half %3 @@ -1141,6 +1856,12 @@ define float @scvtf_i16_f32(ptr %0) { ; CHECK-NEXT: ldrsh w8, [x0] ; CHECK-NEXT: scvtf s0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i16_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrsh w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = sitofp i16 %2 to float ret float %3 @@ -1152,6 +1873,12 @@ define double @scvtf_i16_f64(ptr %0) { ; CHECK-NEXT: ldrsh w8, [x0] ; CHECK-NEXT: scvtf d0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i16_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrsh w8, [x0] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = sitofp i16 %2 to double ret double %3 @@ -1163,6 +1890,13 @@ define half @scvtf_i32_f16(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: scvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i32_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = sitofp i32 %2 to half ret half %3 @@ -1174,6 +1908,12 @@ define float @scvtf_i32_f32(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: scvtf s0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i32_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = sitofp i32 %2 to float ret float %3 @@ -1185,6 +1925,12 @@ define double @scvtf_i32_f64(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: scvtf d0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i32_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: scvtf d0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = sitofp i32 %2 to double ret double %3 @@ -1196,6 +1942,13 @@ define half @scvtf_i64_f16(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: scvtf h0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i64_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = sitofp i64 %2 to half ret half %3 @@ -1207,6 +1960,12 @@ define float @scvtf_i64_f32(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: scvtf s0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i64_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: scvtf s0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = sitofp i64 %2 to float ret float %3 @@ -1218,6 +1977,12 @@ define double @scvtf_i64_f64(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: scvtf d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: scvtf_i64_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: scvtf d0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = sitofp i64 %2 to double ret double %3 @@ -1229,6 +1994,13 @@ define half @ucvtf_i16_f16(ptr %0) { ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ucvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i16_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to half ret half %3 @@ -1240,6 +2012,12 @@ define float @ucvtf_i16_f32(ptr %0) { ; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ucvtf s0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i16_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to float ret float %3 @@ -1251,6 +2029,12 @@ define double @ucvtf_i16_f64(ptr %0) { ; CHECK-NEXT: ldr h0, [x0] ; CHECK-NEXT: ucvtf d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i16_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ret %2 = load i16, ptr %0, align 64 %3 = uitofp i16 %2 to double ret double %3 @@ -1262,6 +2046,13 @@ define half @ucvtf_i32_f16(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ucvtf h0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i32_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to half ret half %3 @@ -1273,6 +2064,12 @@ define float @ucvtf_i32_f32(ptr %0) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ucvtf s0, w8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i32_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, w8 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to float ret float %3 @@ -1284,6 +2081,12 @@ define double @ucvtf_i32_f64(ptr %0) { ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ucvtf d0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i32_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, d0 +; NONEON-NOSVE-NEXT: ret %2 = load i32, ptr %0, align 64 %3 = uitofp i32 %2 to double ret double %3 @@ -1295,6 +2098,13 @@ define half @ucvtf_i64_f16(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ucvtf h0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i64_f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: fcvt h0, s0 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = uitofp i64 %2 to half ret half %3 @@ -1306,6 +2116,12 @@ define float @ucvtf_i64_f32(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ucvtf s0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i64_f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: ucvtf s0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = uitofp i64 %2 to float ret float %3 @@ -1317,6 +2133,12 @@ define double @ucvtf_i64_f64(ptr %0) { ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ucvtf d0, x8 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ucvtf_i64_f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr x8, [x0] +; NONEON-NOSVE-NEXT: ucvtf d0, x8 +; NONEON-NOSVE-NEXT: ret %2 = load i64, ptr %0, align 64 %3 = uitofp i64 %2 to double ret double %3 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll index 3775a64a89a0cb..250929df6b3c35 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,13 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15 +; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x i8> %op1, <4 x i8> %op2 ret <4 x i8> %sel } @@ -36,6 +44,13 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) { ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.8b, v2.8b, #7 +; NONEON-NOSVE-NEXT: cmlt v2.8b, v2.8b, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2 ret <8 x i8> %sel } @@ -54,6 +69,13 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.16b, v2.16b, #7 +; NONEON-NOSVE-NEXT: cmlt v2.16b, v2.16b, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2 ret <16 x i8> %sel } @@ -70,6 +92,18 @@ define void @select_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.b, p0, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: cmeq v4.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: cmeq v5.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %mask = icmp eq <32 x i8> %op1, %op2 @@ -92,6 +126,13 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.2s, v2.2s, #31 +; NONEON-NOSVE-NEXT: cmlt v2.2s, v2.2s, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x i16> %op1, <2 x i16> %op2 ret <2 x i16> %sel } @@ -110,6 +151,13 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15 +; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2 ret <4 x i16> %sel } @@ -129,6 +177,14 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) { ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0 +; NONEON-NOSVE-NEXT: shl v2.8h, v2.8h, #15 +; NONEON-NOSVE-NEXT: cmlt v2.8h, v2.8h, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2 ret <8 x i16> %sel } @@ -145,6 +201,18 @@ define void @select_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: cmeq v4.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: cmeq v5.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %mask = icmp eq <16 x i16> %op1, %op2 @@ -167,6 +235,13 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v2.2s, v2.2s, #31 +; NONEON-NOSVE-NEXT: cmlt v2.2s, v2.2s, #0 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2 ret <2 x i32> %sel } @@ -186,6 +261,14 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) { ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 +; NONEON-NOSVE-NEXT: shl v2.4s, v2.4s, #31 +; NONEON-NOSVE-NEXT: cmlt v2.4s, v2.4s, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2 ret <4 x i32> %sel } @@ -202,6 +285,18 @@ define void @select_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: cmeq v4.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: cmeq v5.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %mask = icmp eq <8 x i32> %op1, %op2 @@ -223,6 +318,14 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: tst w0, #0x1 +; NONEON-NOSVE-NEXT: csetm x8, ne +; NONEON-NOSVE-NEXT: fmov d2, x8 +; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: ret %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2 ret <1 x i64> %sel } @@ -242,6 +345,14 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) { ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0 +; NONEON-NOSVE-NEXT: shl v2.2d, v2.2d, #63 +; NONEON-NOSVE-NEXT: cmlt v2.2d, v2.2d, #0 +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2 ret <2 x i64> %sel } @@ -258,6 +369,18 @@ define void @select_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: select_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: cmeq v4.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: cmeq v5.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b +; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b +; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %mask = icmp eq <4 x i64> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll index 918f0ccc0cf6a0..42c439ca4b38d4 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,19 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) { ; CHECK-NEXT: stp q2, q5, [x0, #32] ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q4, [x0] +; NONEON-NOSVE-NEXT: add v2.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: add v5.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: dup v0.4s, v1.s[2] +; NONEON-NOSVE-NEXT: add v1.4s, v3.4s, v3.4s +; NONEON-NOSVE-NEXT: add v3.4s, v4.4s, v4.4s +; NONEON-NOSVE-NEXT: stp q2, q5, [x0, #32] +; NONEON-NOSVE-NEXT: stp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ret entry: %0 = load <16 x i32>, ptr %arg1, align 256 %1 = load <16 x i32>, ptr %arg2, align 256 @@ -42,6 +56,19 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) { ; CHECK-NEXT: stp q3, q4, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test2: +; NONEON-NOSVE: // %bb.0: // %entry +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q4, [x0] +; NONEON-NOSVE-NEXT: add v2.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: dup v0.2s, v1.s[2] +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: add v3.4s, v3.4s, v3.4s +; NONEON-NOSVE-NEXT: add v4.4s, v4.4s, v4.4s +; NONEON-NOSVE-NEXT: stp q2, q1, [x0, #32] +; NONEON-NOSVE-NEXT: stp q3, q4, [x0] +; NONEON-NOSVE-NEXT: ret entry: %0 = load <16 x i32>, ptr %arg1, align 256 %1 = load <16 x i32>, ptr %arg2, align 256 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll index 8c69d5b0bb375d..992b667a2eafe1 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,13 @@ define <4 x i8> @load_v4i8(ptr %a) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = load <4 x i8>, ptr %a ret <4 x i8> %load } @@ -20,6 +28,11 @@ define <8 x i8> @load_v8i8(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x i8>, ptr %a ret <8 x i8> %load } @@ -29,6 +42,11 @@ define <16 x i8> @load_v16i8(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <16 x i8>, ptr %a ret <16 x i8> %load } @@ -38,6 +56,11 @@ define <32 x i8> @load_v32i8(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <32 x i8>, ptr %a ret <32 x i8> %load } @@ -49,6 +72,15 @@ define <2 x i16> @load_v2i16(ptr %a) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: add x8, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = load <2 x i16>, ptr %a ret <2 x i16> %load } @@ -58,6 +90,11 @@ define <2 x half> @load_v2f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x half>, ptr %a ret <2 x half> %load } @@ -67,6 +104,11 @@ define <4 x i16> @load_v4i16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x i16>, ptr %a ret <4 x i16> %load } @@ -76,6 +118,11 @@ define <4 x half> @load_v4f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x half>, ptr %a ret <4 x half> %load } @@ -85,6 +132,11 @@ define <8 x i16> @load_v8i16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x i16>, ptr %a ret <8 x i16> %load } @@ -94,6 +146,11 @@ define <8 x half> @load_v8f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x half>, ptr %a ret <8 x half> %load } @@ -103,6 +160,11 @@ define <16 x i16> @load_v16i16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <16 x i16>, ptr %a ret <16 x i16> %load } @@ -112,6 +174,11 @@ define <16 x half> @load_v16f16(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <16 x half>, ptr %a ret <16 x half> %load } @@ -121,6 +188,11 @@ define <2 x i32> @load_v2i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x i32>, ptr %a ret <2 x i32> %load } @@ -130,6 +202,11 @@ define <2 x float> @load_v2f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x float>, ptr %a ret <2 x float> %load } @@ -139,6 +216,11 @@ define <4 x i32> @load_v4i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x i32>, ptr %a ret <4 x i32> %load } @@ -148,6 +230,11 @@ define <4 x float> @load_v4f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x float>, ptr %a ret <4 x float> %load } @@ -157,6 +244,11 @@ define <8 x i32> @load_v8i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x i32>, ptr %a ret <8 x i32> %load } @@ -166,6 +258,11 @@ define <8 x float> @load_v8f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <8 x float>, ptr %a ret <8 x float> %load } @@ -175,6 +272,11 @@ define <1 x i64> @load_v1i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <1 x i64>, ptr %a ret <1 x i64> %load } @@ -184,6 +286,11 @@ define <1 x double> @load_v1f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <1 x double>, ptr %a ret <1 x double> %load } @@ -193,6 +300,11 @@ define <2 x i64> @load_v2i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x i64>, ptr %a ret <2 x i64> %load } @@ -202,6 +314,11 @@ define <2 x double> @load_v2f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <2 x double>, ptr %a ret <2 x double> %load } @@ -211,6 +328,11 @@ define <4 x i64> @load_v4i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x i64>, ptr %a ret <4 x i64> %load } @@ -220,6 +342,11 @@ define <4 x double> @load_v4f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: load_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %load = load <4 x double>, ptr %a ret <4 x double> %load } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll index ef52eadc5d3b09..7abe73f08dfd65 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -17,6 +18,14 @@ define i8 @andv_v4i8(<4 x i8> %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a) ret i8 %res } @@ -29,6 +38,15 @@ define i8 @andv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) ret i8 %res } @@ -41,6 +59,20 @@ define i8 @andv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a) ret i8 %res } @@ -54,6 +86,22 @@ define i8 @andv_v32i8(ptr %a) { ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op) ret i8 %res @@ -67,6 +115,13 @@ define i16 @andv_v2i16(<2 x i16> %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a) ret i16 %res } @@ -79,6 +134,14 @@ define i16 @andv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a) ret i16 %res } @@ -91,6 +154,19 @@ define i16 @andv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a) ret i16 %res } @@ -104,6 +180,21 @@ define i16 @andv_v16i16(ptr %a) { ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op) ret i16 %res @@ -117,6 +208,13 @@ define i32 @andv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a) ret i32 %res } @@ -129,6 +227,18 @@ define i32 @andv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) ret i32 %res } @@ -142,6 +252,20 @@ define i32 @andv_v8i32(ptr %a) { ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: and w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op) ret i32 %res @@ -155,6 +279,16 @@ define i64 @andv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a) ret i64 %res } @@ -168,6 +302,18 @@ define i64 @andv_v4i64(ptr %a) { ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: andv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op) ret i64 %res @@ -185,6 +331,14 @@ define i8 @eorv_v4i8(<4 x i8> %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a) ret i8 %res } @@ -197,6 +351,15 @@ define i8 @eorv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a) ret i8 %res } @@ -209,6 +372,20 @@ define i8 @eorv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a) ret i8 %res } @@ -222,6 +399,22 @@ define i8 @eorv_v32i8(ptr %a) { ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op) ret i8 %res @@ -235,6 +428,13 @@ define i16 @eorv_v2i16(<2 x i16> %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a) ret i16 %res } @@ -247,6 +447,14 @@ define i16 @eorv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a) ret i16 %res } @@ -259,6 +467,19 @@ define i16 @eorv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a) ret i16 %res } @@ -272,6 +493,21 @@ define i16 @eorv_v16i16(ptr %a) { ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op) ret i16 %res @@ -285,6 +521,13 @@ define i32 @eorv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a) ret i32 %res } @@ -297,6 +540,18 @@ define i32 @eorv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a) ret i32 %res } @@ -310,6 +565,20 @@ define i32 @eorv_v8i32(ptr %a) { ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: eor w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op) ret i32 %res @@ -323,6 +592,16 @@ define i64 @eorv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a) ret i64 %res } @@ -336,6 +615,18 @@ define i64 @eorv_v4i64(ptr %a) { ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: eorv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op) ret i64 %res @@ -353,6 +644,14 @@ define i8 @orv_v4i8(<4 x i8> %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a) ret i8 %res } @@ -365,6 +664,15 @@ define i8 @orv_v8i8(<8 x i8> %a) { ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) ret i8 %res } @@ -377,6 +685,20 @@ define i8 @orv_v16i8(<16 x i8> %a) { ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a) ret i8 %res } @@ -390,6 +712,22 @@ define i8 @orv_v32i8(ptr %a) { ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #16 +; NONEON-NOSVE-NEXT: lsr x9, x8, #8 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op) ret i8 %res @@ -403,6 +741,13 @@ define i16 @orv_v2i16(<2 x i16> %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a) ret i16 %res } @@ -415,6 +760,14 @@ define i16 @orv_v4i16(<4 x i16> %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a) ret i16 %res } @@ -427,6 +780,19 @@ define i16 @orv_v8i16(<8 x i16> %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a) ret i16 %res } @@ -440,6 +806,21 @@ define i16 @orv_v16i16(ptr %a) { ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32 +; NONEON-NOSVE-NEXT: lsr x9, x8, #16 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op) ret i16 %res @@ -453,6 +834,13 @@ define i32 @orv_v2i32(<2 x i32> %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a) ret i32 %res } @@ -465,6 +853,18 @@ define i32 @orv_v4i32(<4 x i32> %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) ret i32 %res } @@ -478,6 +878,20 @@ define i32 @orv_v8i32(ptr %a) { ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x8, d0 +; NONEON-NOSVE-NEXT: lsr x9, x8, #32 +; NONEON-NOSVE-NEXT: orr w0, w8, w9 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op) ret i32 %res @@ -491,6 +905,16 @@ define i64 @orv_v2i64(<2 x i64> %a) { ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a) ret i64 %res } @@ -504,6 +928,18 @@ define i64 @orv_v4i64(ptr %a) { ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: orv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: ldr d1, [sp, #8] +; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: fmov x0, d0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op) ret i64 %res diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll index 4f8f8c2e4b244a..6c33613f8e757d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,44 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI0_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[0], [x0] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3 +; NONEON-NOSVE-NEXT: b .LBB0_4 +; NONEON-NOSVE-NEXT: .LBB0_2: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4 +; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #1 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9] +; NONEON-NOSVE-NEXT: .LBB0_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8 +; NONEON-NOSVE-NEXT: .LBB0_6: // %else8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_6 +; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.load7 +; NONEON-NOSVE-NEXT: add x8, x0, #3 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = call <4 x i8> @llvm.masked.load.v4i8(ptr %src, i32 8, <4 x i1> %mask, <4 x i8> zeroinitializer) ret <4 x i8> %load } @@ -34,6 +73,67 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI1_0] +; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB1_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr b0, [x0] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_3 +; NONEON-NOSVE-NEXT: b .LBB1_4 +; NONEON-NOSVE-NEXT: .LBB1_2: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_4 +; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #1 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[1], [x9] +; NONEON-NOSVE-NEXT: .LBB1_4: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_11 +; NONEON-NOSVE-NEXT: // %bb.5: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_12 +; NONEON-NOSVE-NEXT: .LBB1_6: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_13 +; NONEON-NOSVE-NEXT: .LBB1_7: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_14 +; NONEON-NOSVE-NEXT: .LBB1_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_15 +; NONEON-NOSVE-NEXT: .LBB1_9: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_16 +; NONEON-NOSVE-NEXT: .LBB1_10: // %else20 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB1_11: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_6 +; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.load7 +; NONEON-NOSVE-NEXT: add x9, x0, #3 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_7 +; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.load10 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_8 +; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.load13 +; NONEON-NOSVE-NEXT: add x9, x0, #5 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_9 +; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.load16 +; NONEON-NOSVE-NEXT: add x9, x0, #6 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_10 +; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.load19 +; NONEON-NOSVE-NEXT: add x8, x0, #7 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[7], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %src, i32 8, <8 x i1> %mask, <8 x i8> zeroinitializer) ret <8 x i8> %load } @@ -49,6 +149,115 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) { ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0 +; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: addv h1, v0.8h +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB2_17 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_18 +; NONEON-NOSVE-NEXT: .LBB2_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_19 +; NONEON-NOSVE-NEXT: .LBB2_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_20 +; NONEON-NOSVE-NEXT: .LBB2_4: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_21 +; NONEON-NOSVE-NEXT: .LBB2_5: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_22 +; NONEON-NOSVE-NEXT: .LBB2_6: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_23 +; NONEON-NOSVE-NEXT: .LBB2_7: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_24 +; NONEON-NOSVE-NEXT: .LBB2_8: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_25 +; NONEON-NOSVE-NEXT: .LBB2_9: // %else23 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_26 +; NONEON-NOSVE-NEXT: .LBB2_10: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_27 +; NONEON-NOSVE-NEXT: .LBB2_11: // %else29 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_28 +; NONEON-NOSVE-NEXT: .LBB2_12: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_29 +; NONEON-NOSVE-NEXT: .LBB2_13: // %else35 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_30 +; NONEON-NOSVE-NEXT: .LBB2_14: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_31 +; NONEON-NOSVE-NEXT: .LBB2_15: // %else41 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_32 +; NONEON-NOSVE-NEXT: .LBB2_16: // %else44 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB2_17: // %cond.load +; NONEON-NOSVE-NEXT: ldr b0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_2 +; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #1 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB2_3 +; NONEON-NOSVE-NEXT: .LBB2_19: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_4 +; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.load7 +; NONEON-NOSVE-NEXT: add x9, x0, #3 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_5 +; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.load10 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_6 +; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.load13 +; NONEON-NOSVE-NEXT: add x9, x0, #5 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_7 +; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.load16 +; NONEON-NOSVE-NEXT: add x9, x0, #6 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_8 +; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.load19 +; NONEON-NOSVE-NEXT: add x9, x0, #7 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[7], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_9 +; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.load22 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[8], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_10 +; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.load25 +; NONEON-NOSVE-NEXT: add x9, x0, #9 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[9], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_11 +; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.load28 +; NONEON-NOSVE-NEXT: add x9, x0, #10 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[10], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_12 +; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.load31 +; NONEON-NOSVE-NEXT: add x9, x0, #11 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[11], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_13 +; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.load34 +; NONEON-NOSVE-NEXT: add x9, x0, #12 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[12], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_14 +; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.load37 +; NONEON-NOSVE-NEXT: add x9, x0, #13 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[13], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_15 +; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.load40 +; NONEON-NOSVE-NEXT: add x9, x0, #14 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[14], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_16 +; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.load43 +; NONEON-NOSVE-NEXT: add x8, x0, #15 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[15], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer) ret <16 x i8> %load } @@ -130,6 +339,277 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) { ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] +; NONEON-NOSVE-NEXT: fmov s1, w1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #80] +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] +; NONEON-NOSVE-NEXT: mov v1.b[1], w2 +; NONEON-NOSVE-NEXT: mov v0.b[1], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: mov v1.b[2], w3 +; NONEON-NOSVE-NEXT: mov v0.b[2], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: mov v1.b[3], w4 +; NONEON-NOSVE-NEXT: mov v0.b[3], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] +; NONEON-NOSVE-NEXT: mov v1.b[4], w5 +; NONEON-NOSVE-NEXT: mov v0.b[4], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: mov v1.b[5], w6 +; NONEON-NOSVE-NEXT: mov v0.b[5], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] +; NONEON-NOSVE-NEXT: mov v1.b[6], w7 +; NONEON-NOSVE-NEXT: mov v0.b[6], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] +; NONEON-NOSVE-NEXT: mov v1.b[7], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: mov v0.b[7], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] +; NONEON-NOSVE-NEXT: mov v1.b[8], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: mov v0.b[8], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] +; NONEON-NOSVE-NEXT: mov v1.b[9], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: mov v0.b[9], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] +; NONEON-NOSVE-NEXT: mov v1.b[10], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] +; NONEON-NOSVE-NEXT: mov v0.b[10], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] +; NONEON-NOSVE-NEXT: mov v1.b[11], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] +; NONEON-NOSVE-NEXT: mov v0.b[11], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] +; NONEON-NOSVE-NEXT: mov v1.b[12], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: mov v0.b[12], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] +; NONEON-NOSVE-NEXT: mov v1.b[13], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #56] +; NONEON-NOSVE-NEXT: mov v0.b[13], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] +; NONEON-NOSVE-NEXT: mov v1.b[14], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #64] +; NONEON-NOSVE-NEXT: mov v0.b[14], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] +; NONEON-NOSVE-NEXT: mov v1.b[15], w9 +; NONEON-NOSVE-NEXT: mov v0.b[15], w8 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0 +; NONEON-NOSVE-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: shl v1.16b, v1.16b, #7 +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: cmlt v1.16b, v1.16b, #0 +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: addv h1, v1.8h +; NONEON-NOSVE-NEXT: addv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: bfi w8, w9, #16, #16 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB3_33 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_34 +; NONEON-NOSVE-NEXT: .LBB3_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_35 +; NONEON-NOSVE-NEXT: .LBB3_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_36 +; NONEON-NOSVE-NEXT: .LBB3_4: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_37 +; NONEON-NOSVE-NEXT: .LBB3_5: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_38 +; NONEON-NOSVE-NEXT: .LBB3_6: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_39 +; NONEON-NOSVE-NEXT: .LBB3_7: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_40 +; NONEON-NOSVE-NEXT: .LBB3_8: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_41 +; NONEON-NOSVE-NEXT: .LBB3_9: // %else23 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_42 +; NONEON-NOSVE-NEXT: .LBB3_10: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_43 +; NONEON-NOSVE-NEXT: .LBB3_11: // %else29 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_44 +; NONEON-NOSVE-NEXT: .LBB3_12: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_45 +; NONEON-NOSVE-NEXT: .LBB3_13: // %else35 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_46 +; NONEON-NOSVE-NEXT: .LBB3_14: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_47 +; NONEON-NOSVE-NEXT: .LBB3_15: // %else41 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_48 +; NONEON-NOSVE-NEXT: .LBB3_16: // %else44 +; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_49 +; NONEON-NOSVE-NEXT: .LBB3_17: // %else47 +; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_50 +; NONEON-NOSVE-NEXT: .LBB3_18: // %else50 +; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_51 +; NONEON-NOSVE-NEXT: .LBB3_19: // %else53 +; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_52 +; NONEON-NOSVE-NEXT: .LBB3_20: // %else56 +; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_53 +; NONEON-NOSVE-NEXT: .LBB3_21: // %else59 +; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_54 +; NONEON-NOSVE-NEXT: .LBB3_22: // %else62 +; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_55 +; NONEON-NOSVE-NEXT: .LBB3_23: // %else65 +; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_56 +; NONEON-NOSVE-NEXT: .LBB3_24: // %else68 +; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_57 +; NONEON-NOSVE-NEXT: .LBB3_25: // %else71 +; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_58 +; NONEON-NOSVE-NEXT: .LBB3_26: // %else74 +; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_59 +; NONEON-NOSVE-NEXT: .LBB3_27: // %else77 +; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_60 +; NONEON-NOSVE-NEXT: .LBB3_28: // %else80 +; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_61 +; NONEON-NOSVE-NEXT: .LBB3_29: // %else83 +; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_62 +; NONEON-NOSVE-NEXT: .LBB3_30: // %else86 +; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_63 +; NONEON-NOSVE-NEXT: .LBB3_31: // %else89 +; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_64 +; NONEON-NOSVE-NEXT: .LBB3_32: // %else92 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB3_33: // %cond.load +; NONEON-NOSVE-NEXT: ldr b0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_2 +; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #1 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB3_3 +; NONEON-NOSVE-NEXT: .LBB3_35: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_4 +; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.load7 +; NONEON-NOSVE-NEXT: add x9, x0, #3 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_5 +; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.load10 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_6 +; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.load13 +; NONEON-NOSVE-NEXT: add x9, x0, #5 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_7 +; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.load16 +; NONEON-NOSVE-NEXT: add x9, x0, #6 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_8 +; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.load19 +; NONEON-NOSVE-NEXT: add x9, x0, #7 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[7], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_9 +; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.load22 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[8], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_10 +; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.load25 +; NONEON-NOSVE-NEXT: add x9, x0, #9 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[9], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_11 +; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.load28 +; NONEON-NOSVE-NEXT: add x9, x0, #10 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[10], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_12 +; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.load31 +; NONEON-NOSVE-NEXT: add x9, x0, #11 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[11], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_13 +; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.load34 +; NONEON-NOSVE-NEXT: add x9, x0, #12 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[12], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_14 +; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.load37 +; NONEON-NOSVE-NEXT: add x9, x0, #13 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[13], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_15 +; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.load40 +; NONEON-NOSVE-NEXT: add x9, x0, #14 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[14], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_16 +; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.load43 +; NONEON-NOSVE-NEXT: add x9, x0, #15 +; NONEON-NOSVE-NEXT: ld1 { v0.b }[15], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_17 +; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.load46 +; NONEON-NOSVE-NEXT: add x9, x0, #16 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[0], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_18 +; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.load49 +; NONEON-NOSVE-NEXT: add x9, x0, #17 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_19 +; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.load52 +; NONEON-NOSVE-NEXT: add x9, x0, #18 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_20 +; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.load55 +; NONEON-NOSVE-NEXT: add x9, x0, #19 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_21 +; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.load58 +; NONEON-NOSVE-NEXT: add x9, x0, #20 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_22 +; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.load61 +; NONEON-NOSVE-NEXT: add x9, x0, #21 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_23 +; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.load64 +; NONEON-NOSVE-NEXT: add x9, x0, #22 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_24 +; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.load67 +; NONEON-NOSVE-NEXT: add x9, x0, #23 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[7], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_25 +; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.load70 +; NONEON-NOSVE-NEXT: add x9, x0, #24 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[8], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_26 +; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.load73 +; NONEON-NOSVE-NEXT: add x9, x0, #25 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[9], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_27 +; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.load76 +; NONEON-NOSVE-NEXT: add x9, x0, #26 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[10], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_28 +; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.load79 +; NONEON-NOSVE-NEXT: add x9, x0, #27 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[11], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_29 +; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.load82 +; NONEON-NOSVE-NEXT: add x9, x0, #28 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[12], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_30 +; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.load85 +; NONEON-NOSVE-NEXT: add x9, x0, #29 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[13], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_31 +; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.load88 +; NONEON-NOSVE-NEXT: add x9, x0, #30 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[14], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_32 +; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.load91 +; NONEON-NOSVE-NEXT: add x8, x0, #31 +; NONEON-NOSVE-NEXT: ld1 { v1.b }[15], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %src, i32 8, <32 x i1> %mask, <32 x i8> zeroinitializer) ret <32 x i8> %load } @@ -155,6 +635,31 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] +; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addp v1.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: movi d0, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB4_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_4 +; NONEON-NOSVE-NEXT: .LBB4_2: // %else2 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.load +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_2 +; NONEON-NOSVE-NEXT: .LBB4_4: // %cond.load1 +; NONEON-NOSVE-NEXT: add x8, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = call <2 x half> @llvm.masked.load.v2f16(ptr %src, i32 8, <2 x i1> %mask, <2 x half> zeroinitializer) ret <2 x half> %load } @@ -170,6 +675,43 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h1, v0.4h +; NONEON-NOSVE-NEXT: movi d0, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB5_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_6 +; NONEON-NOSVE-NEXT: .LBB5_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7 +; NONEON-NOSVE-NEXT: .LBB5_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8 +; NONEON-NOSVE-NEXT: .LBB5_4: // %else8 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB5_5: // %cond.load +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_2 +; NONEON-NOSVE-NEXT: .LBB5_6: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB5_3 +; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_4 +; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.load7 +; NONEON-NOSVE-NEXT: add x8, x0, #6 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = call <4 x half> @llvm.masked.load.v4f16(ptr %src, i32 8, <4 x i1> %mask, <4 x half> zeroinitializer) ret <4 x half> %load } @@ -186,6 +728,65 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] +; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv b1, v0.8b +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB6_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_10 +; NONEON-NOSVE-NEXT: .LBB6_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_11 +; NONEON-NOSVE-NEXT: .LBB6_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_12 +; NONEON-NOSVE-NEXT: .LBB6_4: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_13 +; NONEON-NOSVE-NEXT: .LBB6_5: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_14 +; NONEON-NOSVE-NEXT: .LBB6_6: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_15 +; NONEON-NOSVE-NEXT: .LBB6_7: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_16 +; NONEON-NOSVE-NEXT: .LBB6_8: // %else20 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB6_9: // %cond.load +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_2 +; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB6_3 +; NONEON-NOSVE-NEXT: .LBB6_11: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_4 +; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.load7 +; NONEON-NOSVE-NEXT: add x9, x0, #6 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_5 +; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.load10 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_6 +; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.load13 +; NONEON-NOSVE-NEXT: add x9, x0, #10 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_7 +; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.load16 +; NONEON-NOSVE-NEXT: add x9, x0, #12 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_8 +; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.load19 +; NONEON-NOSVE-NEXT: add x8, x0, #14 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[7], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer) ret <8 x half> %load } @@ -210,6 +811,116 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) { ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0 +; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: addv h2, v0.8h +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s2 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB7_17 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_18 +; NONEON-NOSVE-NEXT: .LBB7_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_19 +; NONEON-NOSVE-NEXT: .LBB7_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_20 +; NONEON-NOSVE-NEXT: .LBB7_4: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_21 +; NONEON-NOSVE-NEXT: .LBB7_5: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_22 +; NONEON-NOSVE-NEXT: .LBB7_6: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_23 +; NONEON-NOSVE-NEXT: .LBB7_7: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_24 +; NONEON-NOSVE-NEXT: .LBB7_8: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_25 +; NONEON-NOSVE-NEXT: .LBB7_9: // %else23 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_26 +; NONEON-NOSVE-NEXT: .LBB7_10: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_27 +; NONEON-NOSVE-NEXT: .LBB7_11: // %else29 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_28 +; NONEON-NOSVE-NEXT: .LBB7_12: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_29 +; NONEON-NOSVE-NEXT: .LBB7_13: // %else35 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_30 +; NONEON-NOSVE-NEXT: .LBB7_14: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_31 +; NONEON-NOSVE-NEXT: .LBB7_15: // %else41 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_32 +; NONEON-NOSVE-NEXT: .LBB7_16: // %else44 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB7_17: // %cond.load +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_2 +; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB7_3 +; NONEON-NOSVE-NEXT: .LBB7_19: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_4 +; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.load7 +; NONEON-NOSVE-NEXT: add x9, x0, #6 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_5 +; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.load10 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_6 +; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.load13 +; NONEON-NOSVE-NEXT: add x9, x0, #10 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_7 +; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.load16 +; NONEON-NOSVE-NEXT: add x9, x0, #12 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_8 +; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.load19 +; NONEON-NOSVE-NEXT: add x9, x0, #14 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[7], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_9 +; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.load22 +; NONEON-NOSVE-NEXT: add x9, x0, #16 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[0], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_10 +; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.load25 +; NONEON-NOSVE-NEXT: add x9, x0, #18 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_11 +; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.load28 +; NONEON-NOSVE-NEXT: add x9, x0, #20 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_12 +; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.load31 +; NONEON-NOSVE-NEXT: add x9, x0, #22 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_13 +; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.load34 +; NONEON-NOSVE-NEXT: add x9, x0, #24 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[4], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_14 +; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.load37 +; NONEON-NOSVE-NEXT: add x9, x0, #26 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[5], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_15 +; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.load40 +; NONEON-NOSVE-NEXT: add x9, x0, #28 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[6], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_16 +; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.load43 +; NONEON-NOSVE-NEXT: add x8, x0, #30 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[7], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <16 x half> @llvm.masked.load.v16f16(ptr %src, i32 8, <16 x i1> %mask, <16 x half> zeroinitializer) ret <16 x half> %load } @@ -225,6 +936,31 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addp v1.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: movi d0, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB8_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_4 +; NONEON-NOSVE-NEXT: .LBB8_2: // %else2 +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB8_3: // %cond.load +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_2 +; NONEON-NOSVE-NEXT: .LBB8_4: // %cond.load1 +; NONEON-NOSVE-NEXT: add x8, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[1], [x8] +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NONEON-NOSVE-NEXT: ret %load = call <2 x float> @llvm.masked.load.v2f32(ptr %src, i32 8, <2 x i1> %mask, <2 x float> zeroinitializer) ret <2 x float> %load } @@ -241,6 +977,41 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI9_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h1, v0.4h +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB9_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_6 +; NONEON-NOSVE-NEXT: .LBB9_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_7 +; NONEON-NOSVE-NEXT: .LBB9_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_8 +; NONEON-NOSVE-NEXT: .LBB9_4: // %else8 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB9_5: // %cond.load +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_2 +; NONEON-NOSVE-NEXT: .LBB9_6: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB9_3 +; NONEON-NOSVE-NEXT: .LBB9_7: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_4 +; NONEON-NOSVE-NEXT: .LBB9_8: // %cond.load7 +; NONEON-NOSVE-NEXT: add x8, x0, #12 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[3], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer) ret <4 x float> %load } @@ -290,6 +1061,66 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) { ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI10_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI10_0] +; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: addv b2, v0.8b +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s2 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB10_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_10 +; NONEON-NOSVE-NEXT: .LBB10_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB10_11 +; NONEON-NOSVE-NEXT: .LBB10_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB10_12 +; NONEON-NOSVE-NEXT: .LBB10_4: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB10_13 +; NONEON-NOSVE-NEXT: .LBB10_5: // %else11 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB10_14 +; NONEON-NOSVE-NEXT: .LBB10_6: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB10_15 +; NONEON-NOSVE-NEXT: .LBB10_7: // %else17 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB10_16 +; NONEON-NOSVE-NEXT: .LBB10_8: // %else20 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB10_9: // %cond.load +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_2 +; NONEON-NOSVE-NEXT: .LBB10_10: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB10_3 +; NONEON-NOSVE-NEXT: .LBB10_11: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB10_4 +; NONEON-NOSVE-NEXT: .LBB10_12: // %cond.load7 +; NONEON-NOSVE-NEXT: add x9, x0, #12 +; NONEON-NOSVE-NEXT: ld1 { v0.s }[3], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB10_5 +; NONEON-NOSVE-NEXT: .LBB10_13: // %cond.load10 +; NONEON-NOSVE-NEXT: add x9, x0, #16 +; NONEON-NOSVE-NEXT: ld1 { v1.s }[0], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB10_6 +; NONEON-NOSVE-NEXT: .LBB10_14: // %cond.load13 +; NONEON-NOSVE-NEXT: add x9, x0, #20 +; NONEON-NOSVE-NEXT: ld1 { v1.s }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB10_7 +; NONEON-NOSVE-NEXT: .LBB10_15: // %cond.load16 +; NONEON-NOSVE-NEXT: add x9, x0, #24 +; NONEON-NOSVE-NEXT: ld1 { v1.s }[2], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB10_8 +; NONEON-NOSVE-NEXT: .LBB10_16: // %cond.load19 +; NONEON-NOSVE-NEXT: add x8, x0, #28 +; NONEON-NOSVE-NEXT: ld1 { v1.s }[3], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <8 x float> @llvm.masked.load.v8f32(ptr %src, i32 8, <8 x i1> %mask, <8 x float> zeroinitializer) ret <8 x float> %load } @@ -306,6 +1137,29 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) { ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI11_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI11_0] +; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addp v1.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB11_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_4 +; NONEON-NOSVE-NEXT: .LBB11_2: // %else2 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB11_3: // %cond.load +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_2 +; NONEON-NOSVE-NEXT: .LBB11_4: // %cond.load1 +; NONEON-NOSVE-NEXT: add x8, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.d }[1], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer) ret <2 x double> %load } @@ -331,6 +1185,42 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) { ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI12_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: addv h2, v0.4h +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: fmov w8, s2 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB12_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB12_6 +; NONEON-NOSVE-NEXT: .LBB12_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB12_7 +; NONEON-NOSVE-NEXT: .LBB12_3: // %else5 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB12_8 +; NONEON-NOSVE-NEXT: .LBB12_4: // %else8 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB12_5: // %cond.load +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB12_2 +; NONEON-NOSVE-NEXT: .LBB12_6: // %cond.load1 +; NONEON-NOSVE-NEXT: add x9, x0, #8 +; NONEON-NOSVE-NEXT: ld1 { v0.d }[1], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB12_3 +; NONEON-NOSVE-NEXT: .LBB12_7: // %cond.load4 +; NONEON-NOSVE-NEXT: add x9, x0, #16 +; NONEON-NOSVE-NEXT: ld1 { v1.d }[0], [x9] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB12_4 +; NONEON-NOSVE-NEXT: .LBB12_8: // %cond.load7 +; NONEON-NOSVE-NEXT: add x8, x0, #24 +; NONEON-NOSVE-NEXT: ld1 { v1.d }[1], [x8] +; NONEON-NOSVE-NEXT: ret %load = call <4 x double> @llvm.masked.load.v4f64(ptr %src, i32 8, <4 x i1> %mask, <4 x double> zeroinitializer) ret <4 x double> %load } @@ -356,6 +1246,38 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_zext_v3i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: and w8, w1, #0x1 +; NONEON-NOSVE-NEXT: bfi w8, w2, #1, #1 +; NONEON-NOSVE-NEXT: bfi w8, w3, #2, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB13_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB13_3 +; NONEON-NOSVE-NEXT: b .LBB13_4 +; NONEON-NOSVE-NEXT: .LBB13_2: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB13_4 +; NONEON-NOSVE-NEXT: .LBB13_3: // %cond.load1 +; NONEON-NOSVE-NEXT: mov v1.16b, v0.16b +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[1], [x9] +; NONEON-NOSVE-NEXT: mov v1.h[2], v0.h[2] +; NONEON-NOSVE-NEXT: fmov d0, d1 +; NONEON-NOSVE-NEXT: .LBB13_4: // %else2 +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB13_6 +; NONEON-NOSVE-NEXT: // %bb.5: // %cond.load4 +; NONEON-NOSVE-NEXT: mov v0.h[1], v0.h[1] +; NONEON-NOSVE-NEXT: add x8, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8] +; NONEON-NOSVE-NEXT: .LBB13_6: // %else5 +; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer) %extend = zext <3 x i16> %load_value to <3 x i32> ret <3 x i32> %extend; @@ -382,6 +1304,38 @@ define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_load_sext_v3i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: sub sp, sp, #16 +; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 +; NONEON-NOSVE-NEXT: and w8, w1, #0x1 +; NONEON-NOSVE-NEXT: bfi w8, w2, #1, #1 +; NONEON-NOSVE-NEXT: bfi w8, w3, #2, #1 +; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB14_2 +; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load +; NONEON-NOSVE-NEXT: ldr h0, [x0] +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB14_3 +; NONEON-NOSVE-NEXT: b .LBB14_4 +; NONEON-NOSVE-NEXT: .LBB14_2: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB14_4 +; NONEON-NOSVE-NEXT: .LBB14_3: // %cond.load1 +; NONEON-NOSVE-NEXT: mov v1.16b, v0.16b +; NONEON-NOSVE-NEXT: add x9, x0, #2 +; NONEON-NOSVE-NEXT: ld1 { v1.h }[1], [x9] +; NONEON-NOSVE-NEXT: mov v1.h[2], v0.h[2] +; NONEON-NOSVE-NEXT: fmov d0, d1 +; NONEON-NOSVE-NEXT: .LBB14_4: // %else2 +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB14_6 +; NONEON-NOSVE-NEXT: // %bb.5: // %cond.load4 +; NONEON-NOSVE-NEXT: mov v0.h[1], v0.h[1] +; NONEON-NOSVE-NEXT: add x8, x0, #4 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8] +; NONEON-NOSVE-NEXT: .LBB14_6: // %else5 +; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 +; NONEON-NOSVE-NEXT: add sp, sp, #16 +; NONEON-NOSVE-NEXT: ret %load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer) %extend = sext <3 x i16> %load_value to <3 x i32> ret <3 x i32> %extend; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll index bd6b96889b4cc5..0904399558aee1 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,37 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI0_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB0_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_6 +; NONEON-NOSVE-NEXT: .LBB0_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7 +; NONEON-NOSVE-NEXT: .LBB0_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8 +; NONEON-NOSVE-NEXT: .LBB0_4: // %else6 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB0_5: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_2 +; NONEON-NOSVE-NEXT: .LBB0_6: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB0_3 +; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_4 +; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } @@ -34,6 +66,57 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI1_0] +; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB1_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_10 +; NONEON-NOSVE-NEXT: .LBB1_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_11 +; NONEON-NOSVE-NEXT: .LBB1_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_12 +; NONEON-NOSVE-NEXT: .LBB1_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_13 +; NONEON-NOSVE-NEXT: .LBB1_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_14 +; NONEON-NOSVE-NEXT: .LBB1_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_15 +; NONEON-NOSVE-NEXT: .LBB1_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_16 +; NONEON-NOSVE-NEXT: .LBB1_8: // %else14 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB1_9: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2 +; NONEON-NOSVE-NEXT: .LBB1_10: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB1_3 +; NONEON-NOSVE-NEXT: .LBB1_11: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_4 +; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_5 +; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.store7 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_6 +; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.store9 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_7 +; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.store11 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_8 +; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.store13 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) ret void } @@ -49,6 +132,99 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0 +; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: addv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB2_17 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_18 +; NONEON-NOSVE-NEXT: .LBB2_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_19 +; NONEON-NOSVE-NEXT: .LBB2_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_20 +; NONEON-NOSVE-NEXT: .LBB2_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_21 +; NONEON-NOSVE-NEXT: .LBB2_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_22 +; NONEON-NOSVE-NEXT: .LBB2_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_23 +; NONEON-NOSVE-NEXT: .LBB2_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_24 +; NONEON-NOSVE-NEXT: .LBB2_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_25 +; NONEON-NOSVE-NEXT: .LBB2_9: // %else16 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_26 +; NONEON-NOSVE-NEXT: .LBB2_10: // %else18 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_27 +; NONEON-NOSVE-NEXT: .LBB2_11: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_28 +; NONEON-NOSVE-NEXT: .LBB2_12: // %else22 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_29 +; NONEON-NOSVE-NEXT: .LBB2_13: // %else24 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_30 +; NONEON-NOSVE-NEXT: .LBB2_14: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_31 +; NONEON-NOSVE-NEXT: .LBB2_15: // %else28 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_32 +; NONEON-NOSVE-NEXT: .LBB2_16: // %else30 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB2_17: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_2 +; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB2_3 +; NONEON-NOSVE-NEXT: .LBB2_19: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_4 +; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_5 +; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.store7 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_6 +; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.store9 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_7 +; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.store11 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_8 +; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.store13 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_9 +; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.store15 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_10 +; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.store17 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_11 +; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.store19 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_12 +; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.store21 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #11] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_13 +; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.store23 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_14 +; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.store25 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #13] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_15 +; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.store27 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #14] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_16 +; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.store29 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #15] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask) ret void } @@ -129,6 +305,244 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) { ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr w8, [sp, #72] +; NONEON-NOSVE-NEXT: fmov s1, w1 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #80] +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #88] +; NONEON-NOSVE-NEXT: mov v1.b[1], w2 +; NONEON-NOSVE-NEXT: mov v0.b[1], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp] +; NONEON-NOSVE-NEXT: mov v1.b[2], w3 +; NONEON-NOSVE-NEXT: mov v0.b[2], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #96] +; NONEON-NOSVE-NEXT: mov v1.b[3], w4 +; NONEON-NOSVE-NEXT: mov v0.b[3], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #104] +; NONEON-NOSVE-NEXT: mov v1.b[4], w5 +; NONEON-NOSVE-NEXT: mov v0.b[4], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #112] +; NONEON-NOSVE-NEXT: mov v1.b[5], w6 +; NONEON-NOSVE-NEXT: mov v0.b[5], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #120] +; NONEON-NOSVE-NEXT: mov v1.b[6], w7 +; NONEON-NOSVE-NEXT: mov v0.b[6], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #128] +; NONEON-NOSVE-NEXT: mov v1.b[7], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] +; NONEON-NOSVE-NEXT: mov v0.b[7], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #136] +; NONEON-NOSVE-NEXT: mov v1.b[8], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] +; NONEON-NOSVE-NEXT: mov v0.b[8], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #144] +; NONEON-NOSVE-NEXT: mov v1.b[9], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #24] +; NONEON-NOSVE-NEXT: mov v0.b[9], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #152] +; NONEON-NOSVE-NEXT: mov v1.b[10], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #32] +; NONEON-NOSVE-NEXT: mov v0.b[10], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #160] +; NONEON-NOSVE-NEXT: mov v1.b[11], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #40] +; NONEON-NOSVE-NEXT: mov v0.b[11], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #168] +; NONEON-NOSVE-NEXT: mov v1.b[12], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #48] +; NONEON-NOSVE-NEXT: mov v0.b[12], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #176] +; NONEON-NOSVE-NEXT: mov v1.b[13], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #56] +; NONEON-NOSVE-NEXT: mov v0.b[13], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #184] +; NONEON-NOSVE-NEXT: mov v1.b[14], w9 +; NONEON-NOSVE-NEXT: ldr w9, [sp, #64] +; NONEON-NOSVE-NEXT: mov v0.b[14], w8 +; NONEON-NOSVE-NEXT: ldr w8, [sp, #192] +; NONEON-NOSVE-NEXT: mov v1.b[15], w9 +; NONEON-NOSVE-NEXT: mov v0.b[15], w8 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0 +; NONEON-NOSVE-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; NONEON-NOSVE-NEXT: shl v1.16b, v1.16b, #7 +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: cmlt v1.16b, v1.16b, #0 +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v1.16b, v1.16b, v3.16b +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: addv h1, v1.8h +; NONEON-NOSVE-NEXT: addv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w8, s1 +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: bfi w8, w9, #16, #16 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB3_33 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_34 +; NONEON-NOSVE-NEXT: .LBB3_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_35 +; NONEON-NOSVE-NEXT: .LBB3_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_36 +; NONEON-NOSVE-NEXT: .LBB3_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_37 +; NONEON-NOSVE-NEXT: .LBB3_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_38 +; NONEON-NOSVE-NEXT: .LBB3_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_39 +; NONEON-NOSVE-NEXT: .LBB3_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_40 +; NONEON-NOSVE-NEXT: .LBB3_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_41 +; NONEON-NOSVE-NEXT: .LBB3_9: // %else16 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_42 +; NONEON-NOSVE-NEXT: .LBB3_10: // %else18 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_43 +; NONEON-NOSVE-NEXT: .LBB3_11: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_44 +; NONEON-NOSVE-NEXT: .LBB3_12: // %else22 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_45 +; NONEON-NOSVE-NEXT: .LBB3_13: // %else24 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_46 +; NONEON-NOSVE-NEXT: .LBB3_14: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_47 +; NONEON-NOSVE-NEXT: .LBB3_15: // %else28 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_48 +; NONEON-NOSVE-NEXT: .LBB3_16: // %else30 +; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_49 +; NONEON-NOSVE-NEXT: .LBB3_17: // %else32 +; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_50 +; NONEON-NOSVE-NEXT: .LBB3_18: // %else34 +; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_51 +; NONEON-NOSVE-NEXT: .LBB3_19: // %else36 +; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_52 +; NONEON-NOSVE-NEXT: .LBB3_20: // %else38 +; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_53 +; NONEON-NOSVE-NEXT: .LBB3_21: // %else40 +; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_54 +; NONEON-NOSVE-NEXT: .LBB3_22: // %else42 +; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_55 +; NONEON-NOSVE-NEXT: .LBB3_23: // %else44 +; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_56 +; NONEON-NOSVE-NEXT: .LBB3_24: // %else46 +; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_57 +; NONEON-NOSVE-NEXT: .LBB3_25: // %else48 +; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_58 +; NONEON-NOSVE-NEXT: .LBB3_26: // %else50 +; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_59 +; NONEON-NOSVE-NEXT: .LBB3_27: // %else52 +; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_60 +; NONEON-NOSVE-NEXT: .LBB3_28: // %else54 +; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_61 +; NONEON-NOSVE-NEXT: .LBB3_29: // %else56 +; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_62 +; NONEON-NOSVE-NEXT: .LBB3_30: // %else58 +; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_63 +; NONEON-NOSVE-NEXT: .LBB3_31: // %else60 +; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_64 +; NONEON-NOSVE-NEXT: .LBB3_32: // %else62 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB3_33: // %cond.store +; NONEON-NOSVE-NEXT: strb wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_2 +; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.store1 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #1] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB3_3 +; NONEON-NOSVE-NEXT: .LBB3_35: // %cond.store3 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_4 +; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.store5 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #3] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_5 +; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.store7 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_6 +; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.store9 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #5] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_7 +; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.store11 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_8 +; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.store13 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #7] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_9 +; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.store15 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_10 +; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.store17 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #9] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_11 +; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.store19 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_12 +; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.store21 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #11] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_13 +; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.store23 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_14 +; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.store25 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #13] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_15 +; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.store27 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #14] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_16 +; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.store29 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #15] +; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_17 +; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.store31 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_18 +; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.store33 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #17] +; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_19 +; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.store35 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #18] +; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_20 +; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.store37 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #19] +; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_21 +; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.store39 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #20] +; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_22 +; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.store41 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #21] +; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_23 +; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.store43 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #22] +; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_24 +; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.store45 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #23] +; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_25 +; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.store47 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #24] +; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_26 +; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.store49 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #25] +; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_27 +; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.store51 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #26] +; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_28 +; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.store53 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #27] +; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_29 +; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.store55 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #28] +; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_30 +; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.store57 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #29] +; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_31 +; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.store59 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #30] +; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_32 +; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.store61 +; NONEON-NOSVE-NEXT: strb wzr, [x0, #31] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask) ret void } @@ -154,6 +568,29 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) { ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] +; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB4_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_4 +; NONEON-NOSVE-NEXT: .LBB4_2: // %else2 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_2 +; NONEON-NOSVE-NEXT: .LBB4_4: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask) ret void } @@ -169,6 +606,41 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB5_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_6 +; NONEON-NOSVE-NEXT: .LBB5_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7 +; NONEON-NOSVE-NEXT: .LBB5_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8 +; NONEON-NOSVE-NEXT: .LBB5_4: // %else6 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB5_5: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_2 +; NONEON-NOSVE-NEXT: .LBB5_6: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB5_3 +; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.store3 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_4 +; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.store5 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #6] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } @@ -185,6 +657,65 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] +; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB6_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_10 +; NONEON-NOSVE-NEXT: .LBB6_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_11 +; NONEON-NOSVE-NEXT: .LBB6_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_12 +; NONEON-NOSVE-NEXT: .LBB6_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_13 +; NONEON-NOSVE-NEXT: .LBB6_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_14 +; NONEON-NOSVE-NEXT: .LBB6_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_15 +; NONEON-NOSVE-NEXT: .LBB6_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_16 +; NONEON-NOSVE-NEXT: .LBB6_8: // %else14 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB6_9: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_2 +; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB6_3 +; NONEON-NOSVE-NEXT: .LBB6_11: // %cond.store3 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_4 +; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.store5 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_5 +; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.store7 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_6 +; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.store9 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_7 +; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.store11 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_8 +; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.store13 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #14] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) ret void } @@ -209,6 +740,115 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) { ; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1] ; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0 +; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] +; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: addv h0, v0.8h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB7_17 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_18 +; NONEON-NOSVE-NEXT: .LBB7_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_19 +; NONEON-NOSVE-NEXT: .LBB7_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_20 +; NONEON-NOSVE-NEXT: .LBB7_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_21 +; NONEON-NOSVE-NEXT: .LBB7_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_22 +; NONEON-NOSVE-NEXT: .LBB7_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_23 +; NONEON-NOSVE-NEXT: .LBB7_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_24 +; NONEON-NOSVE-NEXT: .LBB7_8: // %else14 +; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_25 +; NONEON-NOSVE-NEXT: .LBB7_9: // %else16 +; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_26 +; NONEON-NOSVE-NEXT: .LBB7_10: // %else18 +; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_27 +; NONEON-NOSVE-NEXT: .LBB7_11: // %else20 +; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_28 +; NONEON-NOSVE-NEXT: .LBB7_12: // %else22 +; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_29 +; NONEON-NOSVE-NEXT: .LBB7_13: // %else24 +; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_30 +; NONEON-NOSVE-NEXT: .LBB7_14: // %else26 +; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_31 +; NONEON-NOSVE-NEXT: .LBB7_15: // %else28 +; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_32 +; NONEON-NOSVE-NEXT: .LBB7_16: // %else30 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB7_17: // %cond.store +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_2 +; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.store1 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #2] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB7_3 +; NONEON-NOSVE-NEXT: .LBB7_19: // %cond.store3 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_4 +; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.store5 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #6] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_5 +; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.store7 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_6 +; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.store9 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #10] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_7 +; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.store11 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_8 +; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.store13 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #14] +; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_9 +; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.store15 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_10 +; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.store17 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #18] +; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_11 +; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.store19 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #20] +; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_12 +; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.store21 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #22] +; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_13 +; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.store23 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #24] +; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_14 +; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.store25 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #26] +; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_15 +; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.store27 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #28] +; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_16 +; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.store29 +; NONEON-NOSVE-NEXT: fmov s0, wzr +; NONEON-NOSVE-NEXT: str h0, [x0, #30] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask) ret void } @@ -225,6 +865,37 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB8_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_6 +; NONEON-NOSVE-NEXT: .LBB8_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB8_7 +; NONEON-NOSVE-NEXT: .LBB8_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB8_8 +; NONEON-NOSVE-NEXT: .LBB8_4: // %else6 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB8_5: // %cond.store +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_2 +; NONEON-NOSVE-NEXT: .LBB8_6: // %cond.store1 +; NONEON-NOSVE-NEXT: str wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB8_3 +; NONEON-NOSVE-NEXT: .LBB8_7: // %cond.store3 +; NONEON-NOSVE-NEXT: str wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB8_4 +; NONEON-NOSVE-NEXT: .LBB8_8: // %cond.store5 +; NONEON-NOSVE-NEXT: str wzr, [x0, #12] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } @@ -275,6 +946,57 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) { ; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI9_0] +; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv b0, v0.8b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB9_9 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_10 +; NONEON-NOSVE-NEXT: .LBB9_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_11 +; NONEON-NOSVE-NEXT: .LBB9_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_12 +; NONEON-NOSVE-NEXT: .LBB9_4: // %else6 +; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB9_13 +; NONEON-NOSVE-NEXT: .LBB9_5: // %else8 +; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB9_14 +; NONEON-NOSVE-NEXT: .LBB9_6: // %else10 +; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB9_15 +; NONEON-NOSVE-NEXT: .LBB9_7: // %else12 +; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB9_16 +; NONEON-NOSVE-NEXT: .LBB9_8: // %else14 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB9_9: // %cond.store +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_2 +; NONEON-NOSVE-NEXT: .LBB9_10: // %cond.store1 +; NONEON-NOSVE-NEXT: str wzr, [x0, #4] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB9_3 +; NONEON-NOSVE-NEXT: .LBB9_11: // %cond.store3 +; NONEON-NOSVE-NEXT: str wzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_4 +; NONEON-NOSVE-NEXT: .LBB9_12: // %cond.store5 +; NONEON-NOSVE-NEXT: str wzr, [x0, #12] +; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB9_5 +; NONEON-NOSVE-NEXT: .LBB9_13: // %cond.store7 +; NONEON-NOSVE-NEXT: str wzr, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB9_6 +; NONEON-NOSVE-NEXT: .LBB9_14: // %cond.store9 +; NONEON-NOSVE-NEXT: str wzr, [x0, #20] +; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB9_7 +; NONEON-NOSVE-NEXT: .LBB9_15: // %cond.store11 +; NONEON-NOSVE-NEXT: str wzr, [x0, #24] +; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB9_8 +; NONEON-NOSVE-NEXT: .LBB9_16: // %cond.store13 +; NONEON-NOSVE-NEXT: str wzr, [x0, #28] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) ret void } @@ -291,6 +1013,27 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI10_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI10_0] +; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addp v0.2s, v0.2s, v0.2s +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB10_3 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_4 +; NONEON-NOSVE-NEXT: .LBB10_2: // %else2 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.store +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_2 +; NONEON-NOSVE-NEXT: .LBB10_4: // %cond.store1 +; NONEON-NOSVE-NEXT: str xzr, [x0, #8] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask) ret void } @@ -315,6 +1058,37 @@ define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) { ; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: masked_store_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15 +; NONEON-NOSVE-NEXT: adrp x8, .LCPI11_0 +; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI11_0] +; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: addv h0, v0.4h +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB11_5 +; NONEON-NOSVE-NEXT: // %bb.1: // %else +; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_6 +; NONEON-NOSVE-NEXT: .LBB11_2: // %else2 +; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB11_7 +; NONEON-NOSVE-NEXT: .LBB11_3: // %else4 +; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB11_8 +; NONEON-NOSVE-NEXT: .LBB11_4: // %else6 +; NONEON-NOSVE-NEXT: ret +; NONEON-NOSVE-NEXT: .LBB11_5: // %cond.store +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_2 +; NONEON-NOSVE-NEXT: .LBB11_6: // %cond.store1 +; NONEON-NOSVE-NEXT: str xzr, [x0, #8] +; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB11_3 +; NONEON-NOSVE-NEXT: .LBB11_7: // %cond.store3 +; NONEON-NOSVE-NEXT: str xzr, [x0, #16] +; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB11_4 +; NONEON-NOSVE-NEXT: .LBB11_8: // %cond.store5 +; NONEON-NOSVE-NEXT: str xzr, [x0, #24] +; NONEON-NOSVE-NEXT: ret call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask) ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll index aef446a90df656..6a6b47e815ac16 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,15 @@ define void @add_v4i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ldr s1, [x1] +; NONEON-NOSVE-NEXT: uaddl v0.8h, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; NONEON-NOSVE-NEXT: str s0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i8>, ptr %a %op2 = load <4 x i8>, ptr %b %res = add <4 x i8> %op1, %op2 @@ -29,6 +39,14 @@ define void @add_v8i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ldr d1, [x1] +; NONEON-NOSVE-NEXT: add v0.8b, v0.8b, v1.8b +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i8>, ptr %a %op2 = load <8 x i8>, ptr %b %res = add <8 x i8> %op1, %op2 @@ -44,6 +62,14 @@ define void @add_v16i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.b, z0.b, z1.b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i8>, ptr %a %op2 = load <16 x i8>, ptr %b %res = add <16 x i8> %op1, %op2 @@ -60,6 +86,15 @@ define void @add_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %res = add <32 x i8> %op1, %op2 @@ -76,6 +111,23 @@ define void @add_v2i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldrh w8, [x0] +; NONEON-NOSVE-NEXT: ldrh w9, [x1] +; NONEON-NOSVE-NEXT: fmov s0, w8 +; NONEON-NOSVE-NEXT: fmov s1, w9 +; NONEON-NOSVE-NEXT: add x8, x0, #2 +; NONEON-NOSVE-NEXT: add x9, x1, #2 +; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8] +; NONEON-NOSVE-NEXT: ld1 { v1.h }[2], [x9] +; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: mov w8, v0.s[1] +; NONEON-NOSVE-NEXT: fmov w9, s0 +; NONEON-NOSVE-NEXT: strh w9, [x0] +; NONEON-NOSVE-NEXT: strh w8, [x0, #2] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i16>, ptr %a %op2 = load <2 x i16>, ptr %b %res = add <2 x i16> %op1, %op2 @@ -91,6 +143,14 @@ define void @add_v4i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ldr d1, [x1] +; NONEON-NOSVE-NEXT: add v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i16>, ptr %a %op2 = load <4 x i16>, ptr %b %res = add <4 x i16> %op1, %op2 @@ -106,6 +166,14 @@ define void @add_v8i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b %res = add <8 x i16> %op1, %op2 @@ -122,6 +190,15 @@ define void @add_v16i16(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: add z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: add_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %res = add <16 x i16> %op1, %op2 @@ -137,6 +214,13 @@ define void @abs_v2i32(ptr %a) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: abs v0.2s, v0.2s +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i32>, ptr %a %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false) store <2 x i32> %res, ptr %a @@ -151,6 +235,13 @@ define void @abs_v4i32(ptr %a) { ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i32>, ptr %a %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false) store <4 x i32> %res, ptr %a @@ -166,6 +257,14 @@ define void @abs_v8i32(ptr %a) { ; CHECK-NEXT: abs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s +; NONEON-NOSVE-NEXT: abs v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false) store <8 x i32> %res, ptr %a @@ -180,6 +279,13 @@ define void @abs_v2i64(ptr %a) { ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x i64>, ptr %a %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false) store <2 x i64> %res, ptr %a @@ -195,6 +301,14 @@ define void @abs_v4i64(ptr %a) { ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: abs_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d +; NONEON-NOSVE-NEXT: abs v1.2d, v1.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false) store <4 x i64> %res, ptr %a @@ -211,6 +325,17 @@ define void @fadd_v2f16(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr s0, [x0] +; NONEON-NOSVE-NEXT: ldr s1, [x1] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: str s0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x half>, ptr %a %op2 = load <2 x half>, ptr %b %res = fadd <2 x half> %op1, %op2 @@ -227,6 +352,17 @@ define void @fadd_v4f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ldr d1, [x1] +; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x half>, ptr %a %op2 = load <4 x half>, ptr %b %res = fadd <4 x half> %op1, %op2 @@ -243,6 +379,21 @@ define void @fadd_v8f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fadd v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v2.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: str q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x half>, ptr %a %op2 = load <8 x half>, ptr %b %res = fadd <8 x half> %op1, %op2 @@ -261,6 +412,29 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h +; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h +; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h +; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h +; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h +; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h +; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h +; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h +; NONEON-NOSVE-NEXT: fadd v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: fadd v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v2.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s +; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s +; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s +; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s +; NONEON-NOSVE-NEXT: stp q1, q3, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %res = fadd <16 x half> %op1, %op2 @@ -277,6 +451,14 @@ define void @fadd_v2f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ldr d1, [x1] +; NONEON-NOSVE-NEXT: fadd v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x float>, ptr %a %op2 = load <2 x float>, ptr %b %res = fadd <2 x float> %op1, %op2 @@ -293,6 +475,14 @@ define void @fadd_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x float>, ptr %a %op2 = load <4 x float>, ptr %b %res = fadd <4 x float> %op1, %op2 @@ -311,6 +501,15 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %res = fadd <8 x float> %op1, %op2 @@ -327,6 +526,14 @@ define void @fadd_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: fadd v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <2 x double>, ptr %a %op2 = load <2 x double>, ptr %b %res = fadd <2 x double> %op1, %op2 @@ -345,6 +552,15 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fadd_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q3, [x1] +; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] +; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: fadd v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %res = fadd <4 x double> %op1, %op2 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll index 6d91253caae58f..03bb899c517b4e 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -15,6 +16,14 @@ define void @test_revbv16i16(ptr %a) { ; CHECK-NEXT: revb z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revbv16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev16 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> store <32 x i8> %tmp2, ptr %a @@ -31,6 +40,14 @@ define void @test_revbv8i32(ptr %a) { ; CHECK-NEXT: revb z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revbv8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev32 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> store <32 x i8> %tmp2, ptr %a @@ -47,6 +64,14 @@ define void @test_revbv4i64(ptr %a) { ; CHECK-NEXT: revb z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revbv4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev64 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> store <32 x i8> %tmp2, ptr %a @@ -63,6 +88,14 @@ define void @test_revhv8i32(ptr %a) { ; CHECK-NEXT: revh z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev32 v0.8h, v0.8h +; NONEON-NOSVE-NEXT: rev32 v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> store <16 x i16> %tmp2, ptr %a @@ -79,6 +112,14 @@ define void @test_revhv8f32(ptr %a) { ; CHECK-NEXT: revh z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev32 v0.8h, v0.8h +; NONEON-NOSVE-NEXT: rev32 v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x half>, ptr %a %tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> store <16 x half> %tmp2, ptr %a @@ -95,6 +136,14 @@ define void @test_revhv4i64(ptr %a) { ; CHECK-NEXT: revh z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.8h, v0.8h +; NONEON-NOSVE-NEXT: rev64 v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> store <16 x i16> %tmp2, ptr %a @@ -111,6 +160,14 @@ define void @test_revwv4i64(ptr %a) { ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revwv4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s +; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store <8 x i32> %tmp2, ptr %a @@ -127,6 +184,14 @@ define void @test_revwv4f64(ptr %a) { ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revwv4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s +; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x float>, ptr %a %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> store <8 x float> %tmp2, ptr %a @@ -141,6 +206,12 @@ define <16 x i8> @test_revv16i8(ptr %a) { ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revv16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i8>, ptr %a %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -156,6 +227,14 @@ define void @test_revwv8i32v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revwv8i32v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] +; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s +; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = load <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -176,6 +255,18 @@ define void @test_revhv32i16(ptr %a) { ; CHECK-NEXT: stp q0, q1, [x0, #32] ; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revhv32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.8h, v0.8h +; NONEON-NOSVE-NEXT: rev64 v1.8h, v1.8h +; NONEON-NOSVE-NEXT: rev64 v2.8h, v2.8h +; NONEON-NOSVE-NEXT: rev64 v3.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: stp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> store <32 x i16> %tmp2, ptr %a @@ -191,6 +282,14 @@ define void @test_rev_elts_fail(ptr %a) { ; CHECK-NEXT: tbl z0.d, { z2.d }, z0.d ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_rev_elts_fail: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> store <4 x i64> %tmp2, ptr %a @@ -208,6 +307,15 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 { ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revdv4i64_sve2p1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ptrue p0.d, vl2 +; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q +; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> store <4 x i64> %tmp2, ptr %a @@ -223,6 +331,15 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 { ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revdv4f64_sve2p1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ptrue p0.d +; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q +; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> store <4 x double> %tmp2, ptr %a @@ -238,6 +355,16 @@ define void @test_revv8i32(ptr %a) { ; CHECK-NEXT: tbl z0.s, { z2.s }, z0.s ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_revv8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s +; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store <8 x i32> %tmp2, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll index 8808ad9a23d7c5..f254a1f9098f2d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -68,6 +69,18 @@ define void @zip1_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: zip2 v2.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: str q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a %tmp2 = load volatile <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -196,6 +209,28 @@ define void @zip_v32i16(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q4, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q5, q1, [x0] +; NONEON-NOSVE-NEXT: ldp q6, q2, [x1, #32] +; NONEON-NOSVE-NEXT: ldp q7, q3, [x1] +; NONEON-NOSVE-NEXT: zip1 v17.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: zip2 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: zip1 v16.8h, v1.8h, v3.8h +; NONEON-NOSVE-NEXT: zip2 v1.8h, v1.8h, v3.8h +; NONEON-NOSVE-NEXT: zip1 v2.8h, v5.8h, v7.8h +; NONEON-NOSVE-NEXT: zip1 v3.8h, v4.8h, v6.8h +; NONEON-NOSVE-NEXT: zip2 v5.8h, v5.8h, v7.8h +; NONEON-NOSVE-NEXT: zip2 v4.8h, v4.8h, v6.8h +; NONEON-NOSVE-NEXT: add v6.8h, v16.8h, v17.8h +; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: add v2.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: stp q6, q0, [x0, #32] +; NONEON-NOSVE-NEXT: stp q1, q2, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = load <32 x i16>, ptr %b %tmp3 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> @@ -244,6 +279,18 @@ define void @zip1_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: zip2 v2.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: zip1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a %tmp2 = load volatile <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -276,6 +323,18 @@ define void @zip1_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: zip2 v2.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: str q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = load volatile <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -298,6 +357,19 @@ define void @zip_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: zip1 v4.2d, v1.2d, v3.2d +; NONEON-NOSVE-NEXT: zip1 v5.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: zip2 v1.2d, v1.2d, v3.2d +; NONEON-NOSVE-NEXT: zip2 v0.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: fadd v2.2d, v4.2d, v5.2d +; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: stp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b %tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> @@ -330,6 +402,16 @@ define void @zip_v4i32(ptr %a, ptr %b) { ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: zip1 v2.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: zip2 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i32>, ptr %a %tmp2 = load <4 x i32>, ptr %b %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> @@ -351,6 +433,16 @@ define void @zip1_v8i32_undef(ptr %a) { ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip1_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: zip2 v1.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: str q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store volatile <8 x i32> %tmp2, ptr %a @@ -370,6 +462,19 @@ define void @trn_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.b, z1.b, z2.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: trn1 v4.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: trn2 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: trn1 v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: trn2 v2.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: add v0.16b, v4.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -392,6 +497,19 @@ define void @trn_v8i16(ptr %a, ptr %b) { ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0 +; NONEON-NOSVE-NEXT: adrp x9, .LCPI8_1 +; NONEON-NOSVE-NEXT: ldr q1, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; NONEON-NOSVE-NEXT: ldr q2, [x9, :lo12:.LCPI8_1] +; NONEON-NOSVE-NEXT: tbl v0.16b, { v1.16b }, v0.16b +; NONEON-NOSVE-NEXT: tbl v1.16b, { v1.16b }, v2.16b +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i16>, ptr %a %tmp2 = load <8 x i16>, ptr %b %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> @@ -414,6 +532,19 @@ define void @trn_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.h, z1.h, z2.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: trn1 v4.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: trn2 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: trn1 v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: trn2 v2.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: add v0.8h, v4.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v2.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = load <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -436,6 +567,19 @@ define void @trn_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: add z1.s, z1.s, z2.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: zip1 v4.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: trn2 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: trn1 v1.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: trn2 v2.4s, v2.4s, v3.4s +; NONEON-NOSVE-NEXT: add v0.4s, v4.4s, v0.4s +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v2.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = load <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -459,6 +603,19 @@ define void @trn_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q1, q3, [x1] +; NONEON-NOSVE-NEXT: zip1 v4.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: zip2 v0.2d, v0.2d, v1.2d +; NONEON-NOSVE-NEXT: zip1 v1.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: zip2 v2.2d, v2.2d, v3.2d +; NONEON-NOSVE-NEXT: fadd v0.2d, v4.2d, v0.2d +; NONEON-NOSVE-NEXT: fadd v1.2d, v1.2d, v2.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b %tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> @@ -479,6 +636,16 @@ define void @trn_v4f32(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: trn1 v2.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: trn2 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: fadd v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x float>, ptr %a %tmp2 = load <4 x float>, ptr %b %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> @@ -500,6 +667,18 @@ define void @trn_v8i32_undef(ptr %a) { ; CHECK-NEXT: add z1.s, z3.s, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trn_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: trn1 v2.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: trn2 v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: trn1 v3.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: trn2 v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: add v1.4s, v3.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> %tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> @@ -571,6 +750,18 @@ define void @zip2_v32i8(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: zip2 v2.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: str q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a %tmp2 = load volatile <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -617,6 +808,18 @@ define void @zip2_v16i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: zip2 v2.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: zip1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: str q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a %tmp2 = load volatile <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -649,6 +852,18 @@ define void @zip2_v8i32(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: zip2 v2.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v1.4s +; NONEON-NOSVE-NEXT: str q2, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = load volatile <8 x i32>, ptr %b %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> @@ -668,6 +883,16 @@ define void @zip2_v8i32_undef(ptr %a) #0{ ; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip2_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: zip2 v1.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: str q1, [x0, #16] +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> store volatile <8 x i32> %tmp2, ptr %a @@ -869,6 +1094,19 @@ define void @uzp_v32i8(ptr %a, ptr %b) #0{ ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: uzp1 v4.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp2 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: uzp2 v2.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: add v0.16b, v4.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v2.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b %tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> @@ -891,6 +1129,17 @@ define void @uzp_v4i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: ext v1.8b, v0.8b, v0.8b, #6 +; NONEON-NOSVE-NEXT: ext v2.8b, v0.8b, v0.8b, #2 +; NONEON-NOSVE-NEXT: trn1 v1.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: zip1 v0.4h, v2.4h, v0.4h +; NONEON-NOSVE-NEXT: add v0.4h, v1.4h, v0.4h +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i16>, ptr %a %tmp2 = load <4 x i16>, ptr %b %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> @@ -1008,6 +1257,19 @@ define void @uzp_v16i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: uzp1 v4.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp2 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp2 v2.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: add v0.8h, v4.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v2.8h +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = load <16 x i16>, ptr %b %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> @@ -1047,6 +1309,19 @@ define void @uzp_v8f32(ptr %a, ptr %b) #0{ ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: uzp1 v4.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp2 v2.4s, v3.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v0.4s, v4.4s, v0.4s +; NONEON-NOSVE-NEXT: fadd v1.4s, v1.4s, v2.4s +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x float>, ptr %a %tmp2 = load <8 x float>, ptr %b %tmp3 = shufflevector <8 x float> %tmp1, <8 x float> %tmp2, <8 x i32> @@ -1069,6 +1344,19 @@ define void @uzp_v4i64(ptr %a, ptr %b) #0{ ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: zip1 v4.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: zip2 v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: zip1 v1.2d, v3.2d, v2.2d +; NONEON-NOSVE-NEXT: zip2 v2.2d, v3.2d, v2.2d +; NONEON-NOSVE-NEXT: add v0.2d, v4.2d, v0.2d +; NONEON-NOSVE-NEXT: add v1.2d, v1.2d, v2.2d +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = load <4 x i64>, ptr %b %tmp3 = shufflevector <4 x i64> %tmp1, <4 x i64> %tmp2, <4 x i32> @@ -1136,6 +1424,16 @@ define void @uzp_v8i16(ptr %a, ptr %b) #0{ ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: add v0.8h, v2.8h, v0.8h +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i16>, ptr %a %tmp2 = load <8 x i16>, ptr %b %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> @@ -1174,6 +1472,15 @@ define void @uzp_v8i32_undef(ptr %a) #0{ ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: uzp_v8i32_undef: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp2 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> %tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> @@ -1197,6 +1504,19 @@ define void @zip_vscale2_4(ptr %a, ptr %b) { ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: zip_vscale2_4: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] +; NONEON-NOSVE-NEXT: zip1 v4.2d, v1.2d, v3.2d +; NONEON-NOSVE-NEXT: zip1 v5.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: zip2 v1.2d, v1.2d, v3.2d +; NONEON-NOSVE-NEXT: zip2 v0.2d, v0.2d, v2.2d +; NONEON-NOSVE-NEXT: fadd v2.2d, v4.2d, v5.2d +; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d +; NONEON-NOSVE-NEXT: stp q2, q0, [x0] +; NONEON-NOSVE-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b %tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll index 8039bd096bcb89..41d2cb8a2c7564 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -35,6 +36,23 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ptest_v16i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v1.4s, v1.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v3.4s, v3.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v2.4s, v2.4s, #0.0 +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b +; NONEON-NOSVE-NEXT: umaxv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w0, w8, #0x1 +; NONEON-NOSVE-NEXT: ret %v0 = bitcast ptr %a to ptr %v1 = load <16 x float>, ptr %v0, align 4 %v2 = fcmp une <16 x float> %v1, zeroinitializer @@ -92,6 +110,33 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ptest_or_v16i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x1, #32] +; NONEON-NOSVE-NEXT: fcmeq v1.4s, v1.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v3.4s, v3.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v2.4s, v2.4s, #0.0 +; NONEON-NOSVE-NEXT: ldp q6, q7, [x1] +; NONEON-NOSVE-NEXT: fcmeq v4.4s, v4.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v5.4s, v5.4s, #0.0 +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: fcmeq v7.4s, v7.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v6.4s, v6.4s, #0.0 +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v6.8h, v7.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b +; NONEON-NOSVE-NEXT: orn v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: umaxv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w0, w8, #0x1 +; NONEON-NOSVE-NEXT: ret %v0 = bitcast ptr %a to ptr %v1 = load <16 x float>, ptr %v0, align 4 %v2 = fcmp une <16 x float> %v1, zeroinitializer @@ -159,6 +204,33 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: ptest_and_v16i1: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x1, #32] +; NONEON-NOSVE-NEXT: fcmeq v1.4s, v1.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v3.4s, v3.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v2.4s, v2.4s, #0.0 +; NONEON-NOSVE-NEXT: ldp q6, q7, [x1] +; NONEON-NOSVE-NEXT: fcmeq v4.4s, v4.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v5.4s, v5.4s, #0.0 +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; NONEON-NOSVE-NEXT: fcmeq v7.4s, v7.4s, #0.0 +; NONEON-NOSVE-NEXT: fcmeq v6.4s, v6.4s, #0.0 +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v6.8h, v7.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b +; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: uminv b0, v0.16b +; NONEON-NOSVE-NEXT: fmov w8, s0 +; NONEON-NOSVE-NEXT: and w0, w8, #0x1 +; NONEON-NOSVE-NEXT: ret %v0 = bitcast ptr %a to ptr %v1 = load <16 x float>, ptr %v0, align 4 %v2 = fcmp une <16 x float> %v1, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll index 726fd28c90ae22..5626f77c684f22 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -18,6 +19,13 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) { ; CHECK-NEXT: lsr z0.h, z0.h, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev16 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ushr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: ret %res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op) ret <4 x i8> %res } @@ -30,6 +38,11 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) { ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op) ret <8 x i8> %res } @@ -42,6 +55,11 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) { ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op) ret <16 x i8> %res } @@ -55,6 +73,14 @@ define void @bitreverse_v32i8(ptr %a) { ; CHECK-NEXT: rbit z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <32 x i8>, ptr %a %res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op) store <32 x i8> %res, ptr %a @@ -70,6 +96,13 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) { ; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -82,6 +115,12 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) { ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev16 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -94,6 +133,12 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) { ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -107,6 +152,16 @@ define void @bitreverse_v16i16(ptr %a) { ; CHECK-NEXT: rbit z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev16 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -121,6 +176,12 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) { ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -133,6 +194,12 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) { ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -146,6 +213,16 @@ define void @bitreverse_v8i32(ptr %a) { ; CHECK-NEXT: rbit z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev32 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -160,6 +237,12 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) { ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev64 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -172,6 +255,12 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) { ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -185,6 +274,16 @@ define void @bitreverse_v4i64(ptr %a) { ; CHECK-NEXT: rbit z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bitreverse_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev64 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a @@ -204,6 +303,12 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) { ; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: ret %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op) ret <2 x i16> %res } @@ -216,6 +321,11 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) { ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev16 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op) ret <4 x i16> %res } @@ -228,6 +338,11 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) { ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op) ret <8 x i16> %res } @@ -241,6 +356,14 @@ define void @bswap_v16i16(ptr %a) { ; CHECK-NEXT: revb z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev16 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <16 x i16>, ptr %a %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op) store <16 x i16> %res, ptr %a @@ -255,6 +378,11 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) { ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op) ret <2 x i32> %res } @@ -267,6 +395,11 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) { ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op) ret <4 x i32> %res } @@ -280,6 +413,14 @@ define void @bswap_v8i32(ptr %a) { ; CHECK-NEXT: revb z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev32 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <8 x i32>, ptr %a %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op) store <8 x i32> %res, ptr %a @@ -294,6 +435,11 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) { ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev64 v0.8b, v0.8b +; NONEON-NOSVE-NEXT: ret %res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op) ret <1 x i64> %res } @@ -306,6 +452,11 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) { ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op) ret <2 x i64> %res } @@ -319,6 +470,14 @@ define void @bswap_v4i64(ptr %a) { ; CHECK-NEXT: revb z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: bswap_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b +; NONEON-NOSVE-NEXT: rev64 v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op = load <4 x i64>, ptr %a %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll index c022bf85e67e93..55f4f5bae641e5 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,19 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) { ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v1.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff +; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8 +; NONEON-NOSVE-NEXT: ushr v1.4h, v1.4h, #7 +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: usra v0.4h, v1.4h, #3 +; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i8> %op1, shufflevector (<4 x i8> insertelement (<4 x i8> poison, i8 32, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer) ret <4 x i8> %res } @@ -26,6 +40,13 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) { ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.8b, v0.8b, #0 +; NONEON-NOSVE-NEXT: usra v0.8b, v1.8b, #3 +; NONEON-NOSVE-NEXT: sshr v0.8b, v0.8b, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer) ret <8 x i8> %res } @@ -38,6 +59,13 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) { ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: usra v0.16b, v1.16b, #3 +; NONEON-NOSVE-NEXT: sshr v0.16b, v0.16b, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer) ret <16 x i8> %res } @@ -51,6 +79,18 @@ define void @sdiv_v32i8(ptr %a) { ; CHECK-NEXT: asrd z1.b, p0/m, z1.b, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v2.16b, v0.16b, #0 +; NONEON-NOSVE-NEXT: cmlt v3.16b, v1.16b, #0 +; NONEON-NOSVE-NEXT: usra v0.16b, v2.16b, #3 +; NONEON-NOSVE-NEXT: usra v1.16b, v3.16b, #3 +; NONEON-NOSVE-NEXT: sshr v0.16b, v0.16b, #5 +; NONEON-NOSVE-NEXT: sshr v1.16b, v1.16b, #5 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer) store <32 x i8> %res, ptr %a @@ -66,6 +106,20 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) { ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: shl v1.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f +; NONEON-NOSVE-NEXT: dup v2.2s, w8 +; NONEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16 +; NONEON-NOSVE-NEXT: ushr v1.2s, v1.2s, #26 +; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b +; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s +; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i16> %op1, shufflevector (<2 x i16> insertelement (<2 x i16> poison, i16 32, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer) ret <2 x i16> %res } @@ -78,6 +132,13 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) { ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.4h, v0.4h, #0 +; NONEON-NOSVE-NEXT: usra v0.4h, v1.4h, #11 +; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer) ret <4 x i16> %res } @@ -90,6 +151,13 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) { ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.8h, v0.8h, #0 +; NONEON-NOSVE-NEXT: usra v0.8h, v1.8h, #11 +; NONEON-NOSVE-NEXT: sshr v0.8h, v0.8h, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer) ret <8 x i16> %res } @@ -103,6 +171,18 @@ define void @sdiv_v16i16(ptr %a) { ; CHECK-NEXT: asrd z1.h, p0/m, z1.h, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v2.8h, v0.8h, #0 +; NONEON-NOSVE-NEXT: cmlt v3.8h, v1.8h, #0 +; NONEON-NOSVE-NEXT: usra v0.8h, v2.8h, #11 +; NONEON-NOSVE-NEXT: usra v1.8h, v3.8h, #11 +; NONEON-NOSVE-NEXT: sshr v0.8h, v0.8h, #5 +; NONEON-NOSVE-NEXT: sshr v1.8h, v1.8h, #5 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer) store <16 x i16> %res, ptr %a @@ -117,6 +197,13 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) { ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.2s, v0.2s, #0 +; NONEON-NOSVE-NEXT: usra v0.2s, v1.2s, #27 +; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer) ret <2 x i32> %res } @@ -129,6 +216,13 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) { ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.4s, v0.4s, #0 +; NONEON-NOSVE-NEXT: usra v0.4s, v1.4s, #27 +; NONEON-NOSVE-NEXT: sshr v0.4s, v0.4s, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer) ret <4 x i32> %res } @@ -142,6 +236,18 @@ define void @sdiv_v8i32(ptr %a) { ; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v2.4s, v0.4s, #0 +; NONEON-NOSVE-NEXT: cmlt v3.4s, v1.4s, #0 +; NONEON-NOSVE-NEXT: usra v0.4s, v2.4s, #27 +; NONEON-NOSVE-NEXT: usra v1.4s, v3.4s, #27 +; NONEON-NOSVE-NEXT: sshr v0.4s, v0.4s, #5 +; NONEON-NOSVE-NEXT: sshr v1.4s, v1.4s, #5 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer) store <8 x i32> %res, ptr %a @@ -156,6 +262,13 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) { ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt d1, d0, #0 +; NONEON-NOSVE-NEXT: usra d0, d1, #59 +; NONEON-NOSVE-NEXT: sshr d0, d0, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer) ret <1 x i64> %res } @@ -169,6 +282,13 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) { ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: cmlt v1.2d, v0.2d, #0 +; NONEON-NOSVE-NEXT: usra v0.2d, v1.2d, #59 +; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #5 +; NONEON-NOSVE-NEXT: ret %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer) ret <2 x i64> %res } @@ -182,6 +302,18 @@ define void @sdiv_v4i64(ptr %a) { ; CHECK-NEXT: asrd z1.d, p0/m, z1.d, #5 ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: sdiv_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: cmlt v2.2d, v0.2d, #0 +; NONEON-NOSVE-NEXT: cmlt v3.2d, v1.2d, #0 +; NONEON-NOSVE-NEXT: usra v0.2d, v2.2d, #59 +; NONEON-NOSVE-NEXT: usra v1.2d, v3.2d, #59 +; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #5 +; NONEON-NOSVE-NEXT: sshr v1.2d, v1.2d, #5 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer) store <4 x i64> %res, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll index 649b13fa8a1e35..e15529e1926ac7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -15,6 +16,11 @@ define <4 x i8> @splat_v4i8(i8 %a) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.4h, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i8> undef, i8 %a, i64 0 %splat = shufflevector <4 x i8> %insert, <4 x i8> undef, <4 x i32> zeroinitializer ret <4 x i8> %splat @@ -26,6 +32,11 @@ define <8 x i8> @splat_v8i8(i8 %a) { ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.8b, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i8> undef, i8 %a, i64 0 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer ret <8 x i8> %splat @@ -37,6 +48,11 @@ define <16 x i8> @splat_v16i8(i8 %a) { ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.16b, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x i8> undef, i8 %a, i64 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %splat @@ -48,6 +64,12 @@ define void @splat_v32i8(i8 %a, ptr %b) { ; CHECK-NEXT: mov z0.b, w0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.16b, w0 +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <32 x i8> undef, i8 %a, i64 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer store <32 x i8> %splat, ptr %b @@ -60,6 +82,11 @@ define <2 x i16> @splat_v2i16(i16 %a) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.2s, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x i16> undef, i16 %a, i64 0 %splat = shufflevector <2 x i16> %insert, <2 x i16> undef, <2 x i32> zeroinitializer ret <2 x i16> %splat @@ -71,6 +98,11 @@ define <4 x i16> @splat_v4i16(i16 %a) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.4h, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i16> undef, i16 %a, i64 0 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer ret <4 x i16> %splat @@ -82,6 +114,11 @@ define <8 x i16> @splat_v8i16(i16 %a) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.8h, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i16> undef, i16 %a, i64 0 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %splat @@ -93,6 +130,12 @@ define void @splat_v16i16(i16 %a, ptr %b) { ; CHECK-NEXT: mov z0.h, w0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.8h, w0 +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x i16> undef, i16 %a, i64 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer store <16 x i16> %splat, ptr %b @@ -105,6 +148,11 @@ define <2 x i32> @splat_v2i32(i32 %a) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.2s, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x i32> undef, i32 %a, i64 0 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer ret <2 x i32> %splat @@ -116,6 +164,11 @@ define <4 x i32> @splat_v4i32(i32 %a) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.4s, w0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i32> undef, i32 %a, i64 0 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat @@ -127,6 +180,12 @@ define void @splat_v8i32(i32 %a, ptr %b) { ; CHECK-NEXT: mov z0.s, w0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.4s, w0 +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i32> undef, i32 %a, i64 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer store <8 x i32> %splat, ptr %b @@ -139,6 +198,11 @@ define <1 x i64> @splat_v1i64(i64 %a) { ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov d0, x0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <1 x i64> undef, i64 %a, i64 0 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer ret <1 x i64> %splat @@ -150,6 +214,11 @@ define <2 x i64> @splat_v2i64(i64 %a) { ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.2d, x0 +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x i64> undef, i64 %a, i64 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat @@ -161,6 +230,12 @@ define void @splat_v4i64(i64 %a, ptr %b) { ; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: stp q0, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: dup v0.2d, x0 +; NONEON-NOSVE-NEXT: stp q0, q0, [x1] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i64> undef, i64 %a, i64 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer store <4 x i64> %splat, ptr %b @@ -178,6 +253,12 @@ define <2 x half> @splat_v2f16(half %a) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.4h, v0.h[0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x half> undef, half %a, i64 0 %splat = shufflevector <2 x half> %insert, <2 x half> undef, <2 x i32> zeroinitializer ret <2 x half> %splat @@ -190,6 +271,12 @@ define <4 x half> @splat_v4f16(half %a) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.4h, v0.h[0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x half> undef, half %a, i64 0 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer ret <4 x half> %splat @@ -202,6 +289,12 @@ define <8 x half> @splat_v8f16(half %a) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.8h, v0.h[0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x half> undef, half %a, i64 0 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer ret <8 x half> %splat @@ -214,6 +307,13 @@ define void @splat_v16f16(half %a, ptr %b) { ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.8h, v0.h[0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x half> undef, half %a, i64 0 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer store <16 x half> %splat, ptr %b @@ -227,6 +327,12 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) { ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $s0 killed $s0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x float> undef, float %a, i64 0 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer ret <2 x float> %splat @@ -239,6 +345,12 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) { ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $s0 killed $s0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.4s, v0.s[0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x float> undef, float %a, i64 0 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %splat @@ -251,6 +363,13 @@ define void @splat_v8f32(float %a, ptr %b) { ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $s0 killed $s0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.4s, v0.s[0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x float> undef, float %a, i64 0 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer store <8 x float> %splat, ptr %b @@ -261,6 +380,10 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) { ; CHECK-LABEL: splat_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ret %insert = insertelement <1 x double> undef, double %a, i64 0 %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer ret <1 x double> %splat @@ -273,6 +396,12 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) { ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.2d, v0.d[0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <2 x double> undef, double %a, i64 0 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer ret <2 x double> %splat @@ -285,6 +414,13 @@ define void @splat_v4f64(double %a, ptr %b) { ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0 +; NONEON-NOSVE-NEXT: dup v0.2d, v0.d[0] +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x double> undef, double %a, i64 0 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer store <4 x double> %splat, ptr %b @@ -301,6 +437,12 @@ define void @splat_imm_v32i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #1 // =0x1 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.16b, #1 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <32 x i8> undef, i8 1, i64 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer store <32 x i8> %splat, ptr %a @@ -313,6 +455,13 @@ define void @splat_imm_v16i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #2 // =0x2 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2 +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x i16> undef, i16 2, i64 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer store <16 x i16> %splat, ptr %a @@ -325,6 +474,13 @@ define void @splat_imm_v8i32(ptr %a) { ; CHECK-NEXT: mov z0.s, #3 // =0x3 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #3 // =0x3 +; NONEON-NOSVE-NEXT: dup v0.4s, w8 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x i32> undef, i32 3, i64 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer store <8 x i32> %splat, ptr %a @@ -337,6 +493,13 @@ define void @splat_imm_v4i64(ptr %a) { ; CHECK-NEXT: mov z0.d, #4 // =0x4 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #4 // =0x4 +; NONEON-NOSVE-NEXT: dup v0.2d, x8 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x i64> undef, i64 4, i64 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer store <4 x i64> %splat, ptr %a @@ -353,6 +516,13 @@ define void @splat_imm_v16f16(ptr %a) { ; CHECK-NEXT: fmov z0.h, #5.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov w8, #17664 // =0x4500 +; NONEON-NOSVE-NEXT: dup v0.8h, w8 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <16 x half> undef, half 5.0, i64 0 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer store <16 x half> %splat, ptr %a @@ -365,6 +535,12 @@ define void @splat_imm_v8f32(ptr %a) { ; CHECK-NEXT: fmov z0.s, #6.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov v0.4s, #6.00000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <8 x float> undef, float 6.0, i64 0 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer store <8 x float> %splat, ptr %a @@ -377,6 +553,12 @@ define void @splat_imm_v4f64(ptr %a) { ; CHECK-NEXT: fmov z0.d, #7.00000000 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: splat_imm_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov v0.2d, #7.00000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret %insert = insertelement <4 x double> undef, double 7.0, i64 0 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer store <4 x double> %splat, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll index c7435bdbec9497..f055061b13bed6 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -12,6 +13,11 @@ define void @store_v4i8(ptr %a) { ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: st1b { z0.h }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i8> zeroinitializer, ptr %a ret void } @@ -22,6 +28,12 @@ define void @store_v8i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i8> zeroinitializer, ptr %a ret void } @@ -32,6 +44,12 @@ define void @store_v16i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x i8> zeroinitializer, ptr %a ret void } @@ -42,6 +60,12 @@ define void @store_v32i8(ptr %a) { ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <32 x i8> zeroinitializer, ptr %a ret void } @@ -53,6 +77,11 @@ define void @store_v2i16(ptr %a) { ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x i16> zeroinitializer, ptr %a ret void } @@ -64,6 +93,11 @@ define void @store_v2f16(ptr %a) { ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str wzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x half> zeroinitializer, ptr %a ret void } @@ -74,6 +108,12 @@ define void @store_v4i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i16> zeroinitializer, ptr %a ret void } @@ -84,6 +124,12 @@ define void @store_v4f16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d0, #0000000000000000 +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x half> zeroinitializer, ptr %a ret void } @@ -94,6 +140,12 @@ define void @store_v8i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i16> zeroinitializer, ptr %a ret void } @@ -104,6 +156,12 @@ define void @store_v8f16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: str q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x half> zeroinitializer, ptr %a ret void } @@ -114,6 +172,12 @@ define void @store_v16i16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x i16> zeroinitializer, ptr %a ret void } @@ -124,6 +188,12 @@ define void @store_v16f16(ptr %a) { ; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <16 x half> zeroinitializer, ptr %a ret void } @@ -133,6 +203,11 @@ define void @store_v2i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x i32> zeroinitializer, ptr %a ret void } @@ -142,6 +217,11 @@ define void @store_v2f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: str xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: str xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x float> zeroinitializer, ptr %a ret void } @@ -151,6 +231,11 @@ define void @store_v4i32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i32> zeroinitializer, ptr %a ret void } @@ -160,6 +245,11 @@ define void @store_v4f32(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x float> zeroinitializer, ptr %a ret void } @@ -170,6 +260,12 @@ define void @store_v8i32(ptr %a) { ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x i32> zeroinitializer, ptr %a ret void } @@ -180,6 +276,12 @@ define void @store_v8f32(ptr %a) { ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <8 x float> zeroinitializer, ptr %a ret void } @@ -190,6 +292,12 @@ define void @store_v1i64(ptr %a) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v1i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <1 x i64> zeroinitializer, ptr %a ret void } @@ -200,6 +308,12 @@ define void @store_v1f64(ptr %a) { ; CHECK-NEXT: fmov d0, xzr ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v1f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi d0, #0000000000000000 +; NONEON-NOSVE-NEXT: str d0, [x0] +; NONEON-NOSVE-NEXT: ret store <1 x double> zeroinitializer, ptr %a ret void } @@ -209,6 +323,11 @@ define void @store_v2i64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x i64> zeroinitializer, ptr %a ret void } @@ -218,6 +337,11 @@ define void @store_v2f64(ptr %a) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp xzr, xzr, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0] +; NONEON-NOSVE-NEXT: ret store <2 x double> zeroinitializer, ptr %a ret void } @@ -228,6 +352,12 @@ define void @store_v4i64(ptr %a) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x i64> zeroinitializer, ptr %a ret void } @@ -238,6 +368,12 @@ define void @store_v4f64(ptr %a) { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: stp q0, q0, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: stp q0, q0, [x0] +; NONEON-NOSVE-NEXT: ret store <4 x double> zeroinitializer, ptr %a ret void } diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll index 9e04fc236836cc..80c9ef87e9b915 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE ; Test we can code generater patterns of the form: @@ -23,6 +24,12 @@ define void @subvector_v4i8(ptr %in, ptr %out) { ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i8>, ptr %in br label %bb1 @@ -37,6 +44,12 @@ define void @subvector_v8i8(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i8>, ptr %in br label %bb1 @@ -51,6 +64,12 @@ define void @subvector_v16i8(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v16i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i8>, ptr %in br label %bb1 @@ -65,6 +84,12 @@ define void @subvector_v32i8(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v32i8: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i8>, ptr %in br label %bb1 @@ -81,6 +106,12 @@ define void @subvector_v2i16(ptr %in, ptr %out) { ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i16>, ptr %in br label %bb1 @@ -95,6 +126,12 @@ define void @subvector_v4i16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i16>, ptr %in br label %bb1 @@ -109,6 +146,12 @@ define void @subvector_v8i16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i16>, ptr %in br label %bb1 @@ -123,6 +166,12 @@ define void @subvector_v16i16(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v16i16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in br label %bb1 @@ -138,6 +187,12 @@ define void @subvector_v2i32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2i32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i32>, ptr %in br label %bb1 @@ -152,6 +207,12 @@ define void @subvector_v4i32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %in br label %bb1 @@ -166,6 +227,12 @@ define void @subvector_v8i32(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8i32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in br label %bb1 @@ -181,6 +248,12 @@ define void @subvector_v2i64(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2i64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %in br label %bb1 @@ -195,6 +268,12 @@ define void @subvector_v4i64(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4i64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in br label %bb1 @@ -210,6 +289,12 @@ define void @subvector_v2f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr w8, [x0] +; NONEON-NOSVE-NEXT: str w8, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x half>, ptr %in br label %bb1 @@ -224,6 +309,12 @@ define void @subvector_v4f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x half>, ptr %in br label %bb1 @@ -238,6 +329,12 @@ define void @subvector_v8f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x half>, ptr %in br label %bb1 @@ -252,6 +349,12 @@ define void @subvector_v16f16(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v16f16: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x half>, ptr %in br label %bb1 @@ -267,6 +370,12 @@ define void @subvector_v2f32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2f32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr d0, [x0] +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x float>, ptr %in br label %bb1 @@ -281,6 +390,12 @@ define void @subvector_v4f32(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4f32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x float>, ptr %in br label %bb1 @@ -295,6 +410,12 @@ define void @subvector_v8f32(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v8f32: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x float>,ptr %in br label %bb1 @@ -310,6 +431,12 @@ define void @subvector_v2f64(ptr %in, ptr %out) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v2f64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: str q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x double>, ptr %in br label %bb1 @@ -324,6 +451,12 @@ define void @subvector_v4f64(ptr %in, ptr %out) { ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: subvector_v4f64: +; NONEON-NOSVE: // %bb.0: // %bb1 +; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] +; NONEON-NOSVE-NEXT: stp q0, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x double>, ptr %in br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll index b34fe438a063a9..41b68e10e75ded 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -12,6 +13,13 @@ define void @store_trunc_v8i16i8(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v8i16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i16>, ptr %ap %val = trunc <8 x i16> %a to <8 x i8> store <8 x i8> %val, ptr %dest @@ -25,6 +33,14 @@ define void @store_trunc_v4i32i8(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v4i32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; NONEON-NOSVE-NEXT: str s0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = trunc <4 x i32> %a to <4 x i8> store <4 x i8> %val, ptr %dest @@ -38,6 +54,13 @@ define void @store_trunc_v4i32i16(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v4i32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <4 x i32>, ptr %ap %val = trunc <4 x i32> %a to <4 x i16> store <4 x i16> %val, ptr %dest @@ -51,6 +74,13 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) { ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1w { z0.d }, p0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v2i64i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0] +; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d +; NONEON-NOSVE-NEXT: str d0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i64>, ptr %ap %val = trunc <2 x i64> %a to <2 x i32> store <2 x i32> %val, ptr %dest @@ -66,6 +96,14 @@ define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) { ; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d ; CHECK-NEXT: str q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: store_trunc_v2i256i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr d0, [x0, #32] +; NONEON-NOSVE-NEXT: ldr d1, [x0] +; NONEON-NOSVE-NEXT: mov v1.d[1], v0.d[0] +; NONEON-NOSVE-NEXT: str q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <2 x i256>, ptr %ap %val = trunc <2 x i256> %a to <2 x i64> store <2 x i64> %val, ptr %dest diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll index 9e56462df38890..8242b4e26d5057 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -19,6 +20,12 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: ret %a = load <16 x i16>, ptr %in %b = trunc <16 x i16> %a to <16 x i8> ret <16 x i8> %b @@ -41,6 +48,17 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.b, z2.b, z2.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i16>, ptr %in %b = trunc <32 x i16> %a to <32 x i8> %c = add <32 x i8> %b, %b @@ -76,6 +94,24 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v64i16_v64i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: ldp q6, q1, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v2.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: uzp1 v3.16b, v5.16b, v4.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v6.16b, v1.16b +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v2.16b, v2.16b, v2.16b +; NONEON-NOSVE-NEXT: add v3.16b, v3.16b, v3.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q3, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <64 x i16>, ptr %in %b = trunc <64 x i16> %a to <64 x i8> %c = add <64 x i8> %b, %b @@ -133,6 +169,38 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q4, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #128] +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: ldp q16, q1, [x0, #160] +; NONEON-NOSVE-NEXT: uzp1 v4.16b, v5.16b, v4.16b +; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v6.16b, v7.16b, v6.16b +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q18, q7, [x0, #96] +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v16.16b, v1.16b +; NONEON-NOSVE-NEXT: uzp1 v5.16b, v17.16b, v5.16b +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v2.16b, v3.16b, v2.16b +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v4.16b, v4.16b, v4.16b +; NONEON-NOSVE-NEXT: uzp1 v7.16b, v18.16b, v7.16b +; NONEON-NOSVE-NEXT: add v3.16b, v6.16b, v6.16b +; NONEON-NOSVE-NEXT: uzp1 v6.16b, v17.16b, v16.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #96] +; NONEON-NOSVE-NEXT: add v0.16b, v5.16b, v5.16b +; NONEON-NOSVE-NEXT: add v2.16b, v2.16b, v2.16b +; NONEON-NOSVE-NEXT: add v4.16b, v7.16b, v7.16b +; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #64] +; NONEON-NOSVE-NEXT: add v1.16b, v6.16b, v6.16b +; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q2, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <128 x i16>, ptr %in %b = trunc <128 x i16> %a to <128 x i8> %c = add <128 x i8> %b, %b @@ -155,6 +223,13 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = trunc <8 x i32> %a to <8 x i8> ret <8 x i8> %b @@ -178,6 +253,15 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: ret %a = load <16 x i32>, ptr %in %b = trunc <16 x i32> %a to <16 x i8> ret <16 x i8> %b @@ -215,6 +299,23 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.b, z3.b, z3.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i32_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v7.8h, v6.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v1.16b +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i8> %c = add <32 x i8> %b, %b @@ -279,6 +380,36 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #128] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #160] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #224] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: ldp q3, q1, [x0] +; NONEON-NOSVE-NEXT: uzp1 v4.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v6.8h, v7.8h, v6.8h +; NONEON-NOSVE-NEXT: ldp q16, q7, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #96] +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; NONEON-NOSVE-NEXT: uzp1 v5.8h, v17.8h, v5.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; NONEON-NOSVE-NEXT: uzp1 v7.8h, v16.8h, v7.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v19.8h, v18.8h +; NONEON-NOSVE-NEXT: uzp1 v2.16b, v4.16b, v6.16b +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v1.16b, v7.16b +; NONEON-NOSVE-NEXT: uzp1 v3.16b, v5.16b, v3.16b +; NONEON-NOSVE-NEXT: add v2.16b, v2.16b, v2.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add v3.16b, v3.16b, v3.16b +; NONEON-NOSVE-NEXT: stp q1, q3, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i8> %c = add <64 x i8> %b, %b @@ -300,6 +431,12 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: ret %a = load <8 x i32>, ptr %in %b = trunc <8 x i32> %a to <8 x i16> ret <8 x i16> %b @@ -322,6 +459,17 @@ define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.h, z2.h, z2.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i32>, ptr %in %b = trunc <16 x i32> %a to <16 x i16> %c = add <16 x i16> %b, %b @@ -357,6 +505,24 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i32_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: ldp q6, q1, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v6.8h, v1.8h +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v2.8h, v2.8h, v2.8h +; NONEON-NOSVE-NEXT: add v3.8h, v3.8h, v3.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q3, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i16> %c = add <32 x i16> %b, %b @@ -414,6 +580,38 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q4, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #128] +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: ldp q16, q1, [x0, #160] +; NONEON-NOSVE-NEXT: uzp1 v4.8h, v5.8h, v4.8h +; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v6.8h, v7.8h, v6.8h +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q18, q7, [x0, #96] +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v16.8h, v1.8h +; NONEON-NOSVE-NEXT: uzp1 v5.8h, v17.8h, v5.8h +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v4.8h, v4.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp1 v7.8h, v18.8h, v7.8h +; NONEON-NOSVE-NEXT: add v3.8h, v6.8h, v6.8h +; NONEON-NOSVE-NEXT: uzp1 v6.8h, v17.8h, v16.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #96] +; NONEON-NOSVE-NEXT: add v0.8h, v5.8h, v5.8h +; NONEON-NOSVE-NEXT: add v2.8h, v2.8h, v2.8h +; NONEON-NOSVE-NEXT: add v4.8h, v7.8h, v7.8h +; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #64] +; NONEON-NOSVE-NEXT: add v1.8h, v6.8h, v6.8h +; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q2, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i16> %c = add <64 x i16> %b, %b @@ -437,6 +635,13 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i8> ret <4 x i8> %b @@ -461,6 +666,16 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h +; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i8> ret <8 x i8> %b @@ -499,6 +714,21 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind { ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp1 v3.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i8> ret <16 x i8> %b @@ -565,6 +795,35 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z0.b, z0.b, z0.b ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #128] +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #160] +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp1 v16.4s, v17.4s, v16.4s +; NONEON-NOSVE-NEXT: uzp1 v5.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v7.4s, v19.4s, v18.4s +; NONEON-NOSVE-NEXT: uzp1 v6.4s, v21.4s, v20.4s +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v4.8h, v16.8h +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v2.8h, v7.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v6.8h, v5.8h +; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; NONEON-NOSVE-NEXT: uzp1 v1.16b, v2.16b, v3.16b +; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b +; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i8> %c = add <32 x i8> %b, %b @@ -587,6 +846,13 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind { ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i16> ret <4 x i16> %b @@ -610,6 +876,15 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind { ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i16> ret <8 x i16> %b @@ -647,6 +922,23 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.h, z3.h, z3.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i64_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v3.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i16> %c = add <16 x i16> %b, %b @@ -711,6 +1003,36 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q1, q2, [x1, #32] ; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #128] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #160] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #224] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: ldp q3, q1, [x0] +; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v6.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: ldp q16, q7, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #96] +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v3.4s, v1.4s +; NONEON-NOSVE-NEXT: uzp1 v5.4s, v17.4s, v5.4s +; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; NONEON-NOSVE-NEXT: uzp1 v7.4s, v16.4s, v7.4s +; NONEON-NOSVE-NEXT: uzp1 v3.4s, v19.4s, v18.4s +; NONEON-NOSVE-NEXT: uzp1 v2.8h, v4.8h, v6.8h +; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h +; NONEON-NOSVE-NEXT: uzp1 v1.8h, v1.8h, v7.8h +; NONEON-NOSVE-NEXT: uzp1 v3.8h, v5.8h, v3.8h +; NONEON-NOSVE-NEXT: add v2.8h, v2.8h, v2.8h +; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: add v3.8h, v3.8h, v3.8h +; NONEON-NOSVE-NEXT: stp q1, q3, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i16> %c = add <32 x i16> %b, %b @@ -732,6 +1054,12 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind { ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: ret %a = load <4 x i64>, ptr %in %b = trunc <4 x i64> %a to <4 x i32> ret <4 x i32> %b @@ -754,6 +1082,17 @@ define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: add z1.s, z2.s, z2.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q1, q0, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <8 x i64>, ptr %in %b = trunc <8 x i64> %a to <8 x i32> %c = add <8 x i32> %b, %b @@ -789,6 +1128,24 @@ define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v16i64_v16i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64] +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: ldp q6, q1, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: uzp1 v3.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v6.4s, v1.4s +; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: add v2.4s, v2.4s, v2.4s +; NONEON-NOSVE-NEXT: add v3.4s, v3.4s, v3.4s +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32] +; NONEON-NOSVE-NEXT: stp q3, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i32> %c = add <16 x i32> %b, %b @@ -846,6 +1203,38 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind { ; CHECK-NEXT: stp q2, q3, [x1, #32] ; CHECK-NEXT: stp q4, q0, [x1] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #192] +; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #224] +; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #128] +; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; NONEON-NOSVE-NEXT: ldp q16, q1, [x0, #160] +; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s +; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64] +; NONEON-NOSVE-NEXT: uzp1 v6.4s, v7.4s, v6.4s +; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] +; NONEON-NOSVE-NEXT: ldp q18, q7, [x0, #96] +; NONEON-NOSVE-NEXT: uzp1 v1.4s, v16.4s, v1.4s +; NONEON-NOSVE-NEXT: uzp1 v5.4s, v17.4s, v5.4s +; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #32] +; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s +; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s +; NONEON-NOSVE-NEXT: add v4.4s, v4.4s, v4.4s +; NONEON-NOSVE-NEXT: uzp1 v7.4s, v18.4s, v7.4s +; NONEON-NOSVE-NEXT: add v3.4s, v6.4s, v6.4s +; NONEON-NOSVE-NEXT: uzp1 v6.4s, v17.4s, v16.4s +; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s +; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #96] +; NONEON-NOSVE-NEXT: add v0.4s, v5.4s, v5.4s +; NONEON-NOSVE-NEXT: add v2.4s, v2.4s, v2.4s +; NONEON-NOSVE-NEXT: add v4.4s, v7.4s, v7.4s +; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #64] +; NONEON-NOSVE-NEXT: add v1.4s, v6.4s, v6.4s +; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #32] +; NONEON-NOSVE-NEXT: stp q2, q1, [x1] +; NONEON-NOSVE-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i32> %c = add <32 x i32> %b, %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll index 304823c9e64145..874af15e211177 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -14,6 +15,12 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v1.8b, v0.8b, v0.8b, #6 +; NONEON-NOSVE-NEXT: trn1 v0.4h, v0.4h, v1.4h +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x i8> %op1, <4 x i8> %op2, <4 x i32> ret <4 x i8> %ret } @@ -28,6 +35,11 @@ define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: insr z1.b, w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #7 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> ret <8 x i8> %ret } @@ -42,6 +54,11 @@ define <16 x i8> @shuffle_ext_byone_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: insr z1.b, w8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #15 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <16 x i8> %op1, <16 x i8> %op2, <16 x i32> ret <16 x i8> %ret @@ -60,6 +77,15 @@ define void @shuffle_ext_byone_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.b, w8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v32i8: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #15 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #15 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b %ret = shufflevector <32 x i8> %op1, <32 x i8> %op2, <32 x i32> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: revw z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: rev64 v0.2s, v0.2s +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x i16> %op1, <2 x i16> %op2, <2 x i32> ret <2 x i16> %ret } @@ -92,6 +123,11 @@ define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: insr z1.h, w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #6 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x i16> %op1, <4 x i16> %op2, <4 x i32> ret <4 x i16> %ret } @@ -106,6 +142,11 @@ define <8 x i16> @shuffle_ext_byone_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: insr z1.h, w8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <8 x i16> %op1, <8 x i16> %op2, <8 x i32> ret <8 x i16> %ret } @@ -123,6 +164,15 @@ define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.h, w8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16i16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #14 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b %ret = shufflevector <16 x i16> %op1, <16 x i16> %op2, <16 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: insr z1.s, w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x i32> %op1, <2 x i32> %op2, <2 x i32> ret <2 x i32> %ret } @@ -155,6 +210,11 @@ define <4 x i32> @shuffle_ext_byone_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: insr z1.s, w8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x i32> %op1, <4 x i32> %op2, <4 x i32> ret <4 x i32> %ret } @@ -172,6 +232,15 @@ define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.s, w8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #12 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b %ret = shufflevector <8 x i32> %op1, <8 x i32> %op2, <8 x i32> @@ -189,6 +258,11 @@ define <2 x i64> @shuffle_ext_byone_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: insr z1.d, x8 ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x i64> %op1, <2 x i64> %op2, <2 x i32> ret <2 x i64> %ret } @@ -206,6 +280,15 @@ define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.d, x8 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b %ret = shufflevector <4 x i64> %op1, <4 x i64> %op2, <4 x i32> @@ -223,6 +306,11 @@ define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-NEXT: insr z0.h, h2 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #6 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x half> %op1, <4 x half> %op2, <4 x i32> ret <4 x half> %ret } @@ -236,6 +324,11 @@ define <8 x half> @shuffle_ext_byone_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-NEXT: insr z0.h, h2 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <8 x half> %op1, <8 x half> %op2, <8 x i32> ret <8 x half> %ret } @@ -251,6 +344,15 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.h, h2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16f16: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #14 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b %ret = shufflevector <16 x half> %op1, <16 x half> %op2, <16 x i32> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2) ; CHECK-NEXT: insr z0.s, s2 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #4 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x float> %op1, <2 x float> %op2, <2 x i32> ret <2 x float> %ret } @@ -281,6 +388,11 @@ define <4 x float> @shuffle_ext_byone_v4f32(<4 x float> %op1, <4 x float> %op2) ; CHECK-NEXT: insr z0.s, s2 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <4 x float> %op1, <4 x float> %op2, <4 x i32> ret <4 x float> %ret } @@ -296,6 +408,15 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.s, s2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f32: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #12 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b %ret = shufflevector <8 x float> %op1, <8 x float> %op2, <8 x i32> @@ -312,6 +433,11 @@ define <2 x double> @shuffle_ext_byone_v2f64(<2 x double> %op1, <2 x double> %op ; CHECK-NEXT: insr z0.d, d2 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: ret %ret = shufflevector <2 x double> %op1, <2 x double> %op2, <2 x i32> ret <2 x double> %ret } @@ -327,6 +453,15 @@ define void @shuffle_ext_byone_v4f64(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.d, d2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f64: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q1, q2, [x1] +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8 +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #8 +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> @@ -345,6 +480,15 @@ define void @shuffle_ext_byone_reverse(ptr %a, ptr %b) { ; CHECK-NEXT: insr z3.d, d2 ; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_byone_reverse: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldp q0, q2, [x0] +; NONEON-NOSVE-NEXT: ldr q1, [x1, #16] +; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v0.16b, #8 +; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v2.16b, #8 +; NONEON-NOSVE-NEXT: stp q1, q0, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> @@ -359,6 +503,13 @@ define void @shuffle_ext_invalid(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: shuffle_ext_invalid: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: ldr q0, [x0, #16] +; NONEON-NOSVE-NEXT: ldr q1, [x1] +; NONEON-NOSVE-NEXT: stp q0, q1, [x0] +; NONEON-NOSVE-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b %ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll index 6c9c0556056684..e69f59aedc026f 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -11,6 +12,11 @@ define fp128 @test_streaming_compatible_register_mov(fp128 %q0, fp128 %q1) { ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: test_streaming_compatible_register_mov: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b +; NONEON-NOSVE-NEXT: ret ret fp128 %q1 } @@ -20,6 +26,11 @@ define double @fp_zero_constant() { ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, xzr ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fp_zero_constant: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: fmov d0, xzr +; NONEON-NOSVE-NEXT: ret ret double 0.0 } @@ -29,6 +40,11 @@ define <2 x i64> @fixed_vec_zero_constant() { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fixed_vec_zero_constant: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: ret ret <2 x i64> zeroinitializer } @@ -38,5 +54,10 @@ define <2 x double> @fixed_vec_fp_zero_constant() { ; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret +; +; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant: +; NONEON-NOSVE: // %bb.0: +; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000 +; NONEON-NOSVE-NEXT: ret ret <2 x double> } diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 9690e126dfcfcb..3ec36f03a48aa4 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -3249,3 +3249,209 @@ define double @v_fmaximum3_f64_const1_const2(double %a) { %max1 = call double @llvm.maximum.f64(double %max0, double 16.0) ret double %max1 } + +define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) { +; GFX12-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_maximum_f32 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_maximum_f32 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call float @llvm.maximum.f32(float %a, float %b) + %max1 = call float @llvm.maximum.f32(float %max0, float %c) + %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1 + ret <2 x float> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) { +; GFX12-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_maximum_f32 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: s_maximum_f32 s1, s0, s2 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call float @llvm.maximum.f32(float %a, float %b) + %max1 = call float @llvm.maximum.f32(float %max0, float %c) + %cast0 = bitcast float %max0 to i32 + %cast1 = bitcast float %max1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <2 x half> @v_no_fmaximum3_f16__multi_use(half %a, half %b, half %c) { +; GFX12-LABEL: v_no_fmaximum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_maximum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_maximum_f16 v1, v0, v2 +; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f16_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_max_f16_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call half @llvm.maximum.f16(half %a, half %b) + %max1 = call half @llvm.maximum.f16(half %max0, half %c) + %insert.0 = insertelement <2 x half> poison, half %max0, i32 0 + %insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1 + ret <2 x half> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fmaximum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) { +; GFX12-LABEL: s_no_fmaximum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_maximum_f16 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: s_maximum_f16 s1, s0, s2 +; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fmaximum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_max_f16_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_max_f16_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call half @llvm.maximum.f16(half %a, half %b) + %max1 = call half @llvm.maximum.f16(half %max0, half %c) + %cast0 = bitcast half %max0 to i16 + %cast1 = bitcast half %max1 to i16 + %ext0 = zext i16 %cast0 to i32 + %ext1 = zext i16 %cast1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <4 x half> @v_no_fmaximum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) { +; GFX12-LABEL: v_no_fmaximum3_v2f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_pk_maximum_f16 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_v2f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4 +; GFX9-NEXT: v_pk_max_f16 v3, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b) + %max1 = call <2 x half> @llvm.maximum.f16(<2 x half> %max0, <2 x half> %c) + %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> + ret <4 x half> %concat +} + +define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double %c) { +; GFX12-LABEL: v_no_fmaximum3_f64__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_maximum_f64 v[0:1], v[0:1], v[2:3] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_maximum_f64 v[2:3], v[0:1], v[4:5] +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fmaximum3_f64__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f64 v[6:7], v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call double @llvm.maximum.f64(double %a, double %b) + %max1 = call double @llvm.maximum.f64(double %max0, double %c) + %insert.0 = insertelement <2 x double> poison, double %max0, i32 0 + %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 + ret <2 x double> %insert.1 +} diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index 7481fff251d895..0e0b73b88d2dca 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -3249,3 +3249,209 @@ define double @v_fminimum3_f64_const1_const2(double %a) { %max1 = call double @llvm.minimum.f64(double %max0, double 16.0) ret double %max1 } + +define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) { +; GFX12-LABEL: v_no_fminimum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_minimum_f32 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_minimum_f32 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call float @llvm.minimum.f32(float %a, float %b) + %max1 = call float @llvm.minimum.f32(float %max0, float %c) + %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 + %insert.1 = insertelement <2 x float> %insert.0, float %max1, i32 1 + ret <2 x float> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float inreg %b, float inreg %c) { +; GFX12-LABEL: s_no_fminimum3_f32__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_minimum_f32 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) +; GFX12-NEXT: s_minimum_f32 s1, s0, s2 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fminimum3_f32__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call float @llvm.minimum.f32(float %a, float %b) + %max1 = call float @llvm.minimum.f32(float %max0, float %c) + %cast0 = bitcast float %max0 to i32 + %cast1 = bitcast float %max1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %cast1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <2 x half> @v_no_fminimum3_f16__multi_use(half %a, half %b, half %c) { +; GFX12-LABEL: v_no_fminimum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_minimum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_minimum_f16 v1, v0, v2 +; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f16_e32 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-NEXT: v_min_f16_e32 v1, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call half @llvm.minimum.f16(half %a, half %b) + %max1 = call half @llvm.minimum.f16(half %max0, half %c) + %insert.0 = insertelement <2 x half> poison, half %max0, i32 0 + %insert.1 = insertelement <2 x half> %insert.0, half %max1, i32 1 + ret <2 x half> %insert.1 +} + +define amdgpu_ps <2 x i32> @s_no_fminimum3_f16__multi_use(half inreg %a, half inreg %b, half inreg %c) { +; GFX12-LABEL: s_no_fminimum3_f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_minimum_f16 s0, s0, s1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: s_minimum_f16 s1, s0, s2 +; GFX12-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX12-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX12-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_no_fminimum3_f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_min_f16_e32 v1, s0, v0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX9-NEXT: v_min_f16_e32 v1, s2, v0 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, s2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: ; return to shader part epilog + %max0 = call half @llvm.minimum.f16(half %a, half %b) + %max1 = call half @llvm.minimum.f16(half %max0, half %c) + %cast0 = bitcast half %max0 to i16 + %cast1 = bitcast half %max1 to i16 + %ext0 = zext i16 %cast0 to i32 + %ext1 = zext i16 %cast1 to i32 + %readfirstlane0 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext0) + %readfirstlane1 = call i32 @llvm.amdgcn.readfirstlane(i32 %ext1) + %insert.0 = insertelement <2 x i32> poison, i32 %readfirstlane0, i32 0 + %insert.1 = insertelement <2 x i32> %insert.0, i32 %readfirstlane1, i32 1 + ret <2 x i32> %insert.1 +} + +define <4 x half> @v_no_fminimum3_v2f16__multi_use(<2 x half> %a, <2 x half> %b, <2 x half> %c) { +; GFX12-LABEL: v_no_fminimum3_v2f16__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_pk_minimum_f16 v1, v0, v2 +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_v2f16__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v3, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0x7e00 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v5, s4 +; GFX9-NEXT: v_pk_min_f16 v3, v0, v2 +; GFX9-NEXT: v_cmp_o_f16_e32 vcc, v5, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v5, v4, v3, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 +; GFX9-NEXT: v_cmp_o_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-NEXT: v_perm_b32 v1, v1, v5, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call <2 x half> @llvm.minimum.f16(<2 x half> %a, <2 x half> %b) + %max1 = call <2 x half> @llvm.minimum.f16(<2 x half> %max0, <2 x half> %c) + %concat = shufflevector <2 x half> %max0, <2 x half> %max1, <4 x i32> + ret <4 x half> %concat +} + +define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double %c) { +; GFX12-LABEL: v_no_fminimum3_f64__multi_use: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_minimum_f64 v[0:1], v[0:1], v[2:3] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_minimum_f64 v[2:3], v[0:1], v[4:5] +; GFX12-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_no_fminimum3_f64__multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f64 v[6:7], v[0:1], v[2:3] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_mov_b32_e32 v8, 0x7ff80000 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc +; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] +; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] + %max0 = call double @llvm.minimum.f64(double %a, double %b) + %max1 = call double @llvm.minimum.f64(double %max0, double %c) + %insert.0 = insertelement <2 x double> poison, double %max0, i32 0 + %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 + ret <2 x double> %insert.1 +} diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll new file mode 100644 index 00000000000000..50a3336a7483c7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll @@ -0,0 +1,1757 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s + +; Test if fcmp+select patterns form min/max instructions when allowed +; by flags. + +; TODO: Merge with fmin_legacy.ll/fmax_legacy.ll + +define float @v_test_fmin_legacy_ule_f32_safe(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nnan_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nnan i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule float %a, %b + %val = select nnan nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_safe(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nnan_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nnan i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge float %a, %b + %val = select nnan nsz i1 %cmp, float %a, float %b + ret float %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_safe(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x float> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_safe(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <2 x float> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x float> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b + ret <2 x float> %val +} + +define half @v_test_fmin_legacy_ule_f16_safe(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nnan_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nnan i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule half %a, %b + %val = select nnan nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_safe(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nnan_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nnan i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nsz i1 %cmp, half %a, half %b + ret half %val +} + +define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge half %a, %b + %val = select nnan nsz i1 %cmp, half %a, half %b + ret half %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_safe(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <2 x half> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_safe(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nnan <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v3, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX12-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <2 x half> %a, %b + %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b + ret <2 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_safe(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_ngt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 +; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp ule <4 x half> %a, %b + %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_safe(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_safe: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nnan <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v7, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v5, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v4, v0, s4 +; GFX9-NEXT: v_perm_b32 v1, v6, v1, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v3 +; GFX12-NEXT: v_lshrrev_b32_e32 v5, 16, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v7, 16, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v5, v4 +; GFX12-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v7, v6 +; GFX12-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v2 +; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo +; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v1, v3 +; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 +; GFX12-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 x half> %b) { +; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 +; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 +; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 +; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0 +; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1 +; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2 +; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 +; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %cmp = fcmp uge <4 x half> %a, %b + %val = select nnan nsz <4 x i1> %cmp, <4 x half> %a, <4 x half> %b + ret <4 x half> %val +} + +define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) { +; GFX7-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX7-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %a = fadd nnan float %arg0, %arg0 + %b = fadd nnan float %arg1, %arg1 + %cmp = fcmp ule float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} + +define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float %arg1) { +; GFX7-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX7-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v0, v0 +; GFX9-NEXT: v_add_f32_e32 v1, v1, v1 +; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %a = fadd nnan float %arg0, %arg0 + %b = fadd nnan float %arg1, %arg1 + %cmp = fcmp uge float %a, %b + %val = select nsz i1 %cmp, float %a, float %b + ret float %val +} diff --git a/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir b/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir index d8f2b08adaf2fb..dc20a1577aa5bc 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-counter-XToI.mir @@ -3,16 +3,16 @@ # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ # RUN: -run-pass ppc-mi-peepholes %s -o - | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=0,ppc-xtoi-peephole-count=8 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=0-7 \ # RUN: | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=3,ppc-xtoi-peephole-count=2 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=3-4 \ # RUN: | FileCheck %s --check-prefix=ONE-FIRSTSTORE # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=5,ppc-xtoi-peephole-count=2 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=5-6 \ # RUN: | FileCheck %s --check-prefix=ONE-SECONDSTORE # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole-skip=3,ppc-xtoi-peephole-count=4 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-xtoi-peephole=3-6 \ # RUN: | FileCheck %s --check-prefix=TWO --- diff --git a/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir b/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir index cf3ff291e26c6a..09f7ededa20c64 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-counter-perOp.mir @@ -3,16 +3,19 @@ # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ # RUN: -run-pass ppc-mi-peepholes %s -o - | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=0,ppc-per-op-peephole-count=6 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=0-5 \ # RUN: | FileCheck %s --check-prefix=ALL # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=3,ppc-per-op-peephole-count=1 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=0-5 \ +# RUN: | FileCheck %s --check-prefix=ALL +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=3 \ # RUN: | FileCheck %s --check-prefix=ONE-FIRST-RLWINM # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=4,ppc-per-op-peephole-count=1 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=4 \ # RUN: | FileCheck %s --check-prefix=ONE-SECOND-RLWINM # RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ -# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole-skip=3,ppc-per-op-peephole-count=2 \ +# RUN: -run-pass ppc-mi-peepholes %s -o - -debug-counter=ppc-per-op-peephole=3-4 \ # RUN: | FileCheck %s --check-prefix=TWO --- diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 549d531e829ea5..a90c244437a033 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -383,8 +383,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: lui s4, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 @@ -442,9 +442,9 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32M-LABEL: test_cttz_i64: ; RV32M: # %bb.0: ; RV32M-NEXT: lui a2, 30667 -; RV32M-NEXT: addi a2, a2, 1329 -; RV32M-NEXT: lui a3, %hi(.LCPI3_0) -; RV32M-NEXT: addi a3, a3, %lo(.LCPI3_0) +; RV32M-NEXT: addi a3, a2, 1329 +; RV32M-NEXT: lui a2, %hi(.LCPI3_0) +; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0) ; RV32M-NEXT: bnez a1, .LBB3_3 ; RV32M-NEXT: # %bb.1: ; RV32M-NEXT: li a1, 32 @@ -452,18 +452,18 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32M-NEXT: .LBB3_2: ; RV32M-NEXT: neg a1, a0 ; RV32M-NEXT: and a0, a0, a1 -; RV32M-NEXT: mul a0, a0, a2 +; RV32M-NEXT: mul a0, a0, a3 ; RV32M-NEXT: srli a0, a0, 27 -; RV32M-NEXT: add a0, a3, a0 +; RV32M-NEXT: add a0, a2, a0 ; RV32M-NEXT: lbu a0, 0(a0) ; RV32M-NEXT: li a1, 0 ; RV32M-NEXT: ret ; RV32M-NEXT: .LBB3_3: ; RV32M-NEXT: neg a4, a1 ; RV32M-NEXT: and a1, a1, a4 -; RV32M-NEXT: mul a1, a1, a2 +; RV32M-NEXT: mul a1, a1, a3 ; RV32M-NEXT: srli a1, a1, 27 -; RV32M-NEXT: add a1, a3, a1 +; RV32M-NEXT: add a1, a2, a1 ; RV32M-NEXT: lbu a1, 0(a1) ; RV32M-NEXT: bnez a0, .LBB3_2 ; RV32M-NEXT: .LBB3_4: @@ -814,8 +814,8 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI7_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI7_0) +; RV32I-NEXT: lui s4, %hi(.LCPI7_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI7_0) ; RV32I-NEXT: neg a0, s1 ; RV32I-NEXT: and a0, s1, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll index 9ae30e646fdbf7..fe6e20d852d590 100644 --- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll +++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -48,8 +48,8 @@ define signext i32 @ctz_dereferencing_pointer(ptr %b) nounwind { ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI0_0) -; RV32I-NEXT: addi s3, a0, %lo(.LCPI0_0) +; RV32I-NEXT: lui s3, %hi(.LCPI0_0) +; RV32I-NEXT: addi s3, s3, %lo(.LCPI0_0) ; RV32I-NEXT: neg a0, s4 ; RV32I-NEXT: and a0, s4, a0 ; RV32I-NEXT: mv a1, s1 @@ -511,8 +511,8 @@ define signext i32 @ctz4(i64 %b) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI6_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI6_0) +; RV32I-NEXT: lui s4, %hi(.LCPI6_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI6_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index eb6ac985287a10..478d2eae9dca2c 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -24,31 +24,31 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46) -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) -; CHECK-NEXT: vle16.v v14, (a0) +; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40) diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index 3c2e84689c979c..62b1549a5d58ad 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -389,8 +389,8 @@ define dso_local i32 @load_ga() local_unnamed_addr #0 { define dso_local i64 @load_ga_8() nounwind { ; RV32I-LABEL: load_ga_8: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lui a0, %hi(ga_8) -; RV32I-NEXT: addi a1, a0, %lo(ga_8) +; RV32I-NEXT: lui a1, %hi(ga_8) +; RV32I-NEXT: addi a1, a1, %lo(ga_8) ; RV32I-NEXT: lw a0, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index b45ab135fa1c7c..197366e7e05fe8 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -209,8 +209,8 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: lui s4, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 7e6c3f9c87d277..f25aa0de89da88 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -199,8 +199,8 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lui a0, %hi(.LCPI3_0) -; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0) +; RV32I-NEXT: lui s4, %hi(.LCPI3_0) +; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) ; RV32I-NEXT: neg a0, s2 ; RV32I-NEXT: and a0, s2, a0 ; RV32I-NEXT: mv a1, s3 diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 9cb3991f31f94d..08b310213d16e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -126,28 +126,28 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 -; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vsext.vf8 v24, v16 +; CHECK-NEXT: vsaddu.vx v16, v24, a1 +; CHECK-NEXT: vmsltu.vx v9, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v8, v16, a2 -; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmsltu.vx v10, v16, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v8, 2 +; CHECK-NEXT: vslideup.vi v0, v9, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v10, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma @@ -169,13 +169,13 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v8, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) ; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vmsltu.vx v10, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) ; CHECK-NEXT: vle8.v v11, (a0) @@ -187,10 +187,10 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v11, v16, a2 ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_4) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) ; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) ; CHECK-NEXT: vle8.v v13, (a0) @@ -201,27 +201,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsaddu.vx v16, v16, a1 ; CHECK-NEXT: vmsltu.vx v13, v16, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vslideup.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vslideup.vi v10, v9, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v11, 6 +; CHECK-NEXT: vslideup.vi v10, v11, 6 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v12, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v13, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v16, v9 +; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v8, v16, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v0, v9, 6 +; CHECK-NEXT: vslideup.vi v0, v8, 6 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v0, v8, 8 +; CHECK-NEXT: vslideup.vi v0, v10, 8 ; CHECK-NEXT: ret %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc) ret <128 x i1> %mask diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index fff280c005b542..df413b878172bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2574,9 +2574,8 @@ define @vp_ctlz_nxv1i9( %va, @vp_ctlz_nxv1i9( %va, @vp_ctlz_zero_undef_nxv1i9( %va, @vp_ctlz_zero_undef_nxv1i9( %va, @vp_ctpop_nxv1i9( %va, @vp_ctpop_nxv1i9( %va, @llvm.vp.ctpop.nxv1i9( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll index 75747a6674b7b4..d8781495abd75c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -7,7 +7,7 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare @llvm.experimental.constrained.ceil.nxv1f16( @ceil_nxv2f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define @ceil_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare @llvm.experimental.constrained.ceil.nxv2f16( @ceil_nxv4f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define @ceil_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare @llvm.experimental.constrained.ceil.nxv4f16( @ceil_nxv8f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define @ceil_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -95,7 +99,7 @@ declare @llvm.experimental.constrained.ceil.nxv8f16( @ceil_nxv16f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define @ceil_nxv16f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -117,7 +122,7 @@ declare @llvm.experimental.constrained.ceil.nxv16f16( @ceil_nxv32f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -125,6 +130,7 @@ define @ceil_nxv32f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -139,7 +145,7 @@ declare @llvm.experimental.constrained.ceil.nxv32f16( @ceil_nxv1f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -147,6 +153,7 @@ define @ceil_nxv1f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -161,7 +168,7 @@ declare @llvm.experimental.constrained.ceil.nxv1f32( @ceil_nxv2f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -169,6 +176,7 @@ define @ceil_nxv2f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -183,7 +191,7 @@ declare @llvm.experimental.constrained.ceil.nxv2f32( @ceil_nxv4f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -191,6 +199,7 @@ define @ceil_nxv4f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -205,7 +214,7 @@ declare @llvm.experimental.constrained.ceil.nxv4f32( @ceil_nxv8f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -213,6 +222,7 @@ define @ceil_nxv8f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -227,7 +237,7 @@ declare @llvm.experimental.constrained.ceil.nxv8f32( @ceil_nxv16f32( %x) strictfp { ; CHECK-LABEL: ceil_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -235,6 +245,7 @@ define @ceil_nxv16f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -249,7 +260,7 @@ declare @llvm.experimental.constrained.ceil.nxv16f32( @ceil_nxv1f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -257,6 +268,7 @@ define @ceil_nxv1f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -271,7 +283,7 @@ declare @llvm.experimental.constrained.ceil.nxv1f64( @ceil_nxv2f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -279,6 +291,7 @@ define @ceil_nxv2f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -293,7 +306,7 @@ declare @llvm.experimental.constrained.ceil.nxv2f64( @ceil_nxv4f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -301,6 +314,7 @@ define @ceil_nxv4f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -315,7 +329,7 @@ declare @llvm.experimental.constrained.ceil.nxv4f64( @ceil_nxv8f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -323,6 +337,7 @@ define @ceil_nxv8f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll index 31a94532044574..1df452d8641c58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -7,7 +7,7 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-LABEL: floor_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare @llvm.experimental.constrained.floor.nxv1f16( @floor_nxv2f16( %x) strictfp { ; CHECK-LABEL: floor_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define @floor_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare @llvm.experimental.constrained.floor.nxv2f16( @floor_nxv4f16( %x) strictfp { ; CHECK-LABEL: floor_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define @floor_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare @llvm.experimental.constrained.floor.nxv4f16( @floor_nxv8f16( %x) strictfp { ; CHECK-LABEL: floor_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define @floor_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -95,7 +99,7 @@ declare @llvm.experimental.constrained.floor.nxv8f16( @floor_nxv16f16( %x) strictfp { ; CHECK-LABEL: floor_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define @floor_nxv16f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -117,7 +122,7 @@ declare @llvm.experimental.constrained.floor.nxv16f16( @floor_nxv32f16( %x) strictfp { ; CHECK-LABEL: floor_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -125,6 +130,7 @@ define @floor_nxv32f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -139,7 +145,7 @@ declare @llvm.experimental.constrained.floor.nxv32f16( @floor_nxv1f32( %x) strictfp { ; CHECK-LABEL: floor_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -147,6 +153,7 @@ define @floor_nxv1f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -161,7 +168,7 @@ declare @llvm.experimental.constrained.floor.nxv1f32( @floor_nxv2f32( %x) strictfp { ; CHECK-LABEL: floor_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -169,6 +176,7 @@ define @floor_nxv2f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -183,7 +191,7 @@ declare @llvm.experimental.constrained.floor.nxv2f32( @floor_nxv4f32( %x) strictfp { ; CHECK-LABEL: floor_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -191,6 +199,7 @@ define @floor_nxv4f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -205,7 +214,7 @@ declare @llvm.experimental.constrained.floor.nxv4f32( @floor_nxv8f32( %x) strictfp { ; CHECK-LABEL: floor_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -213,6 +222,7 @@ define @floor_nxv8f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -227,7 +237,7 @@ declare @llvm.experimental.constrained.floor.nxv8f32( @floor_nxv16f32( %x) strictfp { ; CHECK-LABEL: floor_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -235,6 +245,7 @@ define @floor_nxv16f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -249,7 +260,7 @@ declare @llvm.experimental.constrained.floor.nxv16f32( @floor_nxv1f64( %x) strictfp { ; CHECK-LABEL: floor_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -257,6 +268,7 @@ define @floor_nxv1f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -271,7 +283,7 @@ declare @llvm.experimental.constrained.floor.nxv1f64( @floor_nxv2f64( %x) strictfp { ; CHECK-LABEL: floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -279,6 +291,7 @@ define @floor_nxv2f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -293,7 +306,7 @@ declare @llvm.experimental.constrained.floor.nxv2f64( @floor_nxv4f64( %x) strictfp { ; CHECK-LABEL: floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -301,6 +314,7 @@ define @floor_nxv4f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -315,7 +329,7 @@ declare @llvm.experimental.constrained.floor.nxv4f64( @floor_nxv8f64( %x) strictfp { ; CHECK-LABEL: floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -323,6 +337,7 @@ define @floor_nxv8f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll index 1e93a73ede5d65..404fb72b8abe91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -7,7 +7,7 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: ceil_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare <1 x half> @llvm.experimental.constrained.ceil.v1f16(<1 x half>, metadat define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: ceil_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare <2 x half> @llvm.experimental.constrained.ceil.v2f16(<2 x half>, metadat define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: ceil_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare <4 x half> @llvm.experimental.constrained.ceil.v4f16(<4 x half>, metadat define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -95,7 +99,7 @@ declare <8 x half> @llvm.experimental.constrained.ceil.v8f16(<8 x half>, metadat define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: ceil_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -118,7 +123,7 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: ceil_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -126,6 +131,7 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -140,7 +146,7 @@ declare <32 x half> @llvm.experimental.constrained.ceil.v32f16(<32 x half>, meta define <1 x float> @ceil_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: ceil_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -148,6 +154,7 @@ define <1 x float> @ceil_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -162,7 +169,7 @@ declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metad define <2 x float> @ceil_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: ceil_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -170,6 +177,7 @@ define <2 x float> @ceil_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -184,7 +192,7 @@ declare <2 x float> @llvm.experimental.constrained.ceil.v2f32(<2 x float>, metad define <4 x float> @ceil_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -192,6 +200,7 @@ define <4 x float> @ceil_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -206,7 +215,7 @@ declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metad define <8 x float> @ceil_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: ceil_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -214,6 +223,7 @@ define <8 x float> @ceil_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -228,7 +238,7 @@ declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metad define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: ceil_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -236,6 +246,7 @@ define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -250,7 +261,7 @@ declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, me define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: ceil_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -258,6 +269,7 @@ define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -272,7 +284,7 @@ declare <1 x double> @llvm.experimental.constrained.ceil.v1f64(<1 x double>, met define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -280,6 +292,7 @@ define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -294,7 +307,7 @@ declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, met define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -302,6 +315,7 @@ define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -316,7 +330,7 @@ declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, met define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: ceil_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -324,6 +338,7 @@ define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll index 53018939fc6eb4..2319aab370d2de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -7,7 +7,7 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: floor_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -15,6 +15,7 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -29,7 +30,7 @@ declare <1 x half> @llvm.experimental.constrained.floor.v1f16(<1 x half>, metada define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: floor_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -37,6 +38,7 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -51,7 +53,7 @@ declare <2 x half> @llvm.experimental.constrained.floor.v2f16(<2 x half>, metada define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: floor_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -59,6 +61,7 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -73,7 +76,7 @@ declare <4 x half> @llvm.experimental.constrained.floor.v4f16(<4 x half>, metada define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -81,6 +84,7 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -95,7 +99,7 @@ declare <8 x half> @llvm.experimental.constrained.floor.v8f16(<8 x half>, metada define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: floor_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -103,6 +107,7 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -118,7 +123,7 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: floor_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -126,6 +131,7 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -140,7 +146,7 @@ declare <32 x half> @llvm.experimental.constrained.floor.v32f16(<32 x half>, met define <1 x float> @floor_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: floor_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -148,6 +154,7 @@ define <1 x float> @floor_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -162,7 +169,7 @@ declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, meta define <2 x float> @floor_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: floor_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -170,6 +177,7 @@ define <2 x float> @floor_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -184,7 +192,7 @@ declare <2 x float> @llvm.experimental.constrained.floor.v2f32(<2 x float>, meta define <4 x float> @floor_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -192,6 +200,7 @@ define <4 x float> @floor_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -206,7 +215,7 @@ declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, meta define <8 x float> @floor_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: floor_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -214,6 +223,7 @@ define <8 x float> @floor_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -228,7 +238,7 @@ declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, meta define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: floor_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -236,6 +246,7 @@ define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -250,7 +261,7 @@ declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, m define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: floor_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -258,6 +269,7 @@ define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -272,7 +284,7 @@ declare <1 x double> @llvm.experimental.constrained.floor.v1f64(<1 x double>, me define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -280,6 +292,7 @@ define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -294,7 +307,7 @@ declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, me define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -302,6 +315,7 @@ define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -316,7 +330,7 @@ declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, me define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: floor_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -324,6 +338,7 @@ define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 9e9a8b8a4b644e..719dd524942846 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -9,7 +9,7 @@ declare <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half>, me define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -32,7 +33,7 @@ declare <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half>, me define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -40,6 +41,7 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -55,7 +57,7 @@ declare <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half>, me define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -63,6 +65,7 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -78,7 +81,7 @@ declare <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half>, define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -86,6 +89,7 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -102,7 +106,7 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -110,6 +114,7 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -125,7 +130,7 @@ declare <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float>, define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -133,6 +138,7 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -148,7 +154,7 @@ declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -156,6 +162,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -171,7 +178,7 @@ declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -179,6 +186,7 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -194,7 +202,7 @@ declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-LABEL: nearbyint_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -202,6 +210,7 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -217,7 +226,7 @@ declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) @@ -225,6 +234,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -240,7 +250,7 @@ declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) @@ -248,6 +258,7 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -263,7 +274,7 @@ declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -271,6 +282,7 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll index f189354237ee3a..e855d9504ff404 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll @@ -9,7 +9,7 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: round_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare <1 x half> @llvm.experimental.constrained.round.v1f16(<1 x half>, metada define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: round_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare <2 x half> @llvm.experimental.constrained.round.v2f16(<2 x half>, metada define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: round_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare <4 x half> @llvm.experimental.constrained.round.v4f16(<4 x half>, metada define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: round_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -97,7 +101,7 @@ declare <8 x half> @llvm.experimental.constrained.round.v8f16(<8 x half>, metada define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: round_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -120,7 +125,7 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: round_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -128,6 +133,7 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -142,7 +148,7 @@ declare <32 x half> @llvm.experimental.constrained.round.v32f16(<32 x half>, met define <1 x float> @round_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: round_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -150,6 +156,7 @@ define <1 x float> @round_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,7 +171,7 @@ declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, meta define <2 x float> @round_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: round_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -172,6 +179,7 @@ define <2 x float> @round_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -186,7 +194,7 @@ declare <2 x float> @llvm.experimental.constrained.round.v2f32(<2 x float>, meta define <4 x float> @round_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: round_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -194,6 +202,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -208,7 +217,7 @@ declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, meta define <8 x float> @round_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: round_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -216,6 +225,7 @@ define <8 x float> @round_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -230,7 +240,7 @@ declare <8 x float> @llvm.experimental.constrained.round.v8f32(<8 x float>, meta define <16 x float> @round_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: round_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -238,6 +248,7 @@ define <16 x float> @round_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -252,7 +263,7 @@ declare <16 x float> @llvm.experimental.constrained.round.v16f32(<16 x float>, m define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: round_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -260,6 +271,7 @@ define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -274,7 +286,7 @@ declare <1 x double> @llvm.experimental.constrained.round.v1f64(<1 x double>, me define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -282,6 +294,7 @@ define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -296,7 +309,7 @@ declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, me define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: round_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -304,6 +317,7 @@ define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -318,7 +332,7 @@ declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, me define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: round_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -326,6 +340,7 @@ define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll index 11920c7c31c981..9976cd2a8ab29a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -9,7 +9,7 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare <1 x half> @llvm.experimental.constrained.roundeven.v1f16(<1 x half>, me define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare <2 x half> @llvm.experimental.constrained.roundeven.v2f16(<2 x half>, me define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare <4 x half> @llvm.experimental.constrained.roundeven.v4f16(<4 x half>, me define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -97,7 +101,7 @@ declare <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 x half>, me define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -120,7 +125,7 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -128,6 +133,7 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -142,7 +148,7 @@ declare <32 x half> @llvm.experimental.constrained.roundeven.v32f16(<32 x half>, define <1 x float> @roundeven_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -150,6 +156,7 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -164,7 +171,7 @@ declare <1 x float> @llvm.experimental.constrained.roundeven.v1f32(<1 x float>, define <2 x float> @roundeven_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -172,6 +179,7 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -186,7 +194,7 @@ declare <2 x float> @llvm.experimental.constrained.roundeven.v2f32(<2 x float>, define <4 x float> @roundeven_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -194,6 +202,7 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -208,7 +217,7 @@ declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, define <8 x float> @roundeven_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -216,6 +225,7 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -230,7 +240,7 @@ declare <8 x float> @llvm.experimental.constrained.roundeven.v8f32(<8 x float>, define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: roundeven_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -238,6 +248,7 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -252,7 +263,7 @@ declare <16 x float> @llvm.experimental.constrained.roundeven.v16f32(<16 x float define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -260,6 +271,7 @@ define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -274,7 +286,7 @@ declare <1 x double> @llvm.experimental.constrained.roundeven.v1f64(<1 x double> define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -282,6 +294,7 @@ define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -296,7 +309,7 @@ declare <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -304,6 +317,7 @@ define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -318,7 +332,7 @@ declare <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -326,6 +340,7 @@ define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll index f16581444afca5..eac26451d5a8cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -7,13 +7,14 @@ define <1 x half> @trunc_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: trunc_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -27,13 +28,14 @@ declare <1 x half> @llvm.experimental.constrained.trunc.v1f16(<1 x half>, metada define <2 x half> @trunc_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: trunc_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -47,13 +49,14 @@ declare <2 x half> @llvm.experimental.constrained.trunc.v2f16(<2 x half>, metada define <4 x half> @trunc_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: trunc_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -67,13 +70,14 @@ declare <4 x half> @llvm.experimental.constrained.trunc.v4f16(<4 x half>, metada define <8 x half> @trunc_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -87,13 +91,14 @@ declare <8 x half> @llvm.experimental.constrained.trunc.v8f16(<8 x half>, metada define <16 x half> @trunc_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: trunc_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -108,13 +113,14 @@ define <32 x half> @trunc_v32f16(<32 x half> %x) strictfp { ; CHECK-LABEL: trunc_v32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -128,13 +134,14 @@ declare <32 x half> @llvm.experimental.constrained.trunc.v32f16(<32 x half>, met define <1 x float> @trunc_v1f32(<1 x float> %x) strictfp { ; CHECK-LABEL: trunc_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -148,13 +155,14 @@ declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, meta define <2 x float> @trunc_v2f32(<2 x float> %x) strictfp { ; CHECK-LABEL: trunc_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -168,13 +176,14 @@ declare <2 x float> @llvm.experimental.constrained.trunc.v2f32(<2 x float>, meta define <4 x float> @trunc_v4f32(<4 x float> %x) strictfp { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -188,13 +197,14 @@ declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, meta define <8 x float> @trunc_v8f32(<8 x float> %x) strictfp { ; CHECK-LABEL: trunc_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -208,13 +218,14 @@ declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, meta define <16 x float> @trunc_v16f32(<16 x float> %x) strictfp { ; CHECK-LABEL: trunc_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -228,13 +239,14 @@ declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, m define <1 x double> @trunc_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: trunc_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -248,13 +260,14 @@ declare <1 x double> @llvm.experimental.constrained.trunc.v1f64(<1 x double>, me define <2 x double> @trunc_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -268,13 +281,14 @@ declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, me define <4 x double> @trunc_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -288,13 +302,14 @@ declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, me define <8 x double> @trunc_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: trunc_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 79c36a629465d9..f4d7074c7f6b27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3459,6 +3459,8 @@ define void @mulhu_v4i64(ptr %x) { ; RV64-NEXT: lui a1, %hi(.LCPI184_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) ; RV64-NEXT: vle64.v v10, (a1) +; RV64-NEXT: vmulhu.vv v10, v8, v10 +; RV64-NEXT: vsub.vv v8, v8, v10 ; RV64-NEXT: li a1, -1 ; RV64-NEXT: slli a1, a1, 63 ; RV64-NEXT: vmv.s.x v12, a1 @@ -3466,8 +3468,6 @@ define void @mulhu_v4i64(ptr %x) { ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; RV64-NEXT: vslideup.vi v14, v12, 2 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmulhu.vv v10, v8, v10 -; RV64-NEXT: vsub.vv v8, v8, v10 ; RV64-NEXT: vmulhu.vv v8, v8, v14 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: lui a1, 12320 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 178a920169ad96..bc3e135a588a6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -159,17 +159,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 82 +; RV32-NEXT: li a3, 80 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 80 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 57 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 6 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -177,26 +176,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 41 +; RV32-NEXT: li a5, 40 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v1, a4 +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 6 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: li a5, 56 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v3, v0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 45 +; RV32-NEXT: li a5, 44 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 @@ -206,8 +205,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a5, a4, 5 -; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: slli a4, a4, 5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill @@ -216,21 +214,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a5, 1 ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a6, 25 +; RV32-NEXT: li a6, 24 ; RV32-NEXT: mul a4, a4, a6 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 73 +; RV32-NEXT: li a4, 72 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -238,27 +236,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a5, -64 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v16, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -266,259 +263,257 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vslideup.vi v12, v8, 2 +; RV32-NEXT: vmv1r.v v8, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v12, v16, 8, v0.t -; RV32-NEXT: vmv.v.v v20, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: lui a3, %hi(.LCPI6_3) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) -; RV32-NEXT: lui a4, %hi(.LCPI6_4) ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: vle16.v v16, (a3) -; RV32-NEXT: addi a1, a4, %lo(.LCPI6_4) +; RV32-NEXT: vle16.v v0, (a1) +; RV32-NEXT: vle16.v v4, (a3) +; RV32-NEXT: lui a1, %hi(.LCPI6_4) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v2, (a1) +; RV32-NEXT: vle16.v v10, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v24, v8, v4 +; RV32-NEXT: vrgatherei16.vv v24, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v4, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v24 +; RV32-NEXT: vmv.v.v v12, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v24, v2 -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vrgatherei16.vv v12, v24, v10 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v16, v8, 6, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v24, 6, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) ; RV32-NEXT: lui a3, %hi(.LCPI6_6) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v4, (a3) -; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vle16.v v12, (a1) +; RV32-NEXT: vle16.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 960 +; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v0, v12 +; RV32-NEXT: vmv1r.v v3, v8 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) ; RV32-NEXT: lui a3, %hi(.LCPI6_8) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8) -; RV32-NEXT: lui a4, %hi(.LCPI6_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: addi a1, a4, %lo(.LCPI6_9) +; RV32-NEXT: lui a1, %hi(.LCPI6_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9) ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; RV32-NEXT: vle16.v v24, (a3) -; RV32-NEXT: vle16.v v28, (a1) +; RV32-NEXT: vle16.v v4, (a3) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v4, v0, v8 +; RV32-NEXT: vrgatherei16.vv v12, v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv4r.v v24, v16 +; RV32-NEXT: vslideup.vi v12, v16, 4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 21 +; RV32-NEXT: li a3, 12 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vrgatherei16.vv v8, v0, v24 +; RV32-NEXT: vrgatherei16.vv v8, v16, v4 +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 13 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: lui a1, 15 ; RV32-NEXT: vmv.s.x v3, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v16, 6 +; RV32-NEXT: vslideup.vi v8, v16, 6 ; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vrgatherei16.vv v8, v24, v12, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) ; RV32-NEXT: lui a3, %hi(.LCPI6_12) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vle16.v v12, (a3) +; RV32-NEXT: vle16.v v24, (a1) +; RV32-NEXT: vle16.v v4, (a3) ; RV32-NEXT: li a1, 1008 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8 +; RV32-NEXT: vrgatherei16.vv v8, v16, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v12, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 2 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) ; RV32-NEXT: lui a3, %hi(.LCPI6_14) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14) -; RV32-NEXT: lui a4, %hi(.LCPI6_15) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v20, (a1) -; RV32-NEXT: addi a1, a4, %lo(.LCPI6_15) +; RV32-NEXT: lui a1, %hi(.LCPI6_15) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15) ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; RV32-NEXT: vle16.v v24, (a3) ; RV32-NEXT: vle16.v v8, (a1) @@ -526,27 +521,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v16, v8, v20, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -554,7 +548,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v20, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -562,12 +556,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vrgatherei16.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 49 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -576,31 +570,28 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 21 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 13 +; RV32-NEXT: li a2, 12 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 57 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 2 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v28, v0 ; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: addi a1, a0, 320 @@ -614,21 +605,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 82 +; RV32-NEXT: li a1, 80 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 17483151869365..7608349ef7aeff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -549,20 +549,20 @@ define <128 x i1> @buildvec_mask_v128i1() { define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; CHECK-LABEL: buildvec_mask_optsize_v128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI21_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0) -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: lui a1, %hi(.LCPI21_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI21_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_v128i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: lui a0, %hi(.LCPI21_0) -; ZVE32F-NEXT: addi a0, a0, %lo(.LCPI21_0) -; ZVE32F-NEXT: li a1, 128 -; ZVE32F-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; ZVE32F-NEXT: vlm.v v0, (a0) +; ZVE32F-NEXT: li a0, 128 +; ZVE32F-NEXT: lui a1, %hi(.LCPI21_0) +; ZVE32F-NEXT: addi a1, a1, %lo(.LCPI21_0) +; ZVE32F-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; ZVE32F-NEXT: vlm.v v0, (a1) ; ZVE32F-NEXT: ret ret <128 x i1> } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index db0969c85a8e24..69341981288b91 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -13327,22 +13327,22 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV32-LABEL: mgather_shuffle_vrgather: ; RV32: # %bb.0: +; RV32-NEXT: lui a1, %hi(.LCPI119_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI119_0) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v9, (a0) -; RV32-NEXT: lui a0, %hi(.LCPI119_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI119_0) +; RV32-NEXT: vle16.v v9, (a1) ; RV32-NEXT: vle16.v v10, (a0) -; RV32-NEXT: vrgather.vv v8, v9, v10 +; RV32-NEXT: vrgather.vv v8, v10, v9 ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_shuffle_vrgather: ; RV64V: # %bb.0: +; RV64V-NEXT: lui a1, %hi(.LCPI119_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI119_0) ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64V-NEXT: vle16.v v9, (a0) -; RV64V-NEXT: lui a0, %hi(.LCPI119_0) -; RV64V-NEXT: addi a0, a0, %lo(.LCPI119_0) +; RV64V-NEXT: vle16.v v9, (a1) ; RV64V-NEXT: vle16.v v10, (a0) -; RV64V-NEXT: vrgather.vv v8, v9, v10 +; RV64V-NEXT: vrgather.vv v8, v10, v9 ; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index d70ed2fb0e2665..4b1f0beb487008 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -228,11 +228,11 @@ define <16 x i8> @reverse_v16i8(<16 x i8> %a) { define <32 x i8> @reverse_v32i8(<32 x i8> %a) { ; CHECK-LABEL: reverse_v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI12_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -243,11 +243,11 @@ define <32 x i8> @reverse_v32i8(<32 x i8> %a) { define <64 x i8> @reverse_v64i8(<64 x i8> %a) { ; CHECK-LABEL: reverse_v64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI13_0) -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: lui a1, %hi(.LCPI13_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI13_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -323,11 +323,11 @@ define <16 x i16> @reverse_v16i16(<16 x i16> %a) { define <32 x i16> @reverse_v32i16(<32 x i16> %a) { ; CHECK-LABEL: reverse_v32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI19_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI19_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vsext.vf2 v16, v12 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -520,11 +520,11 @@ define <16 x half> @reverse_v16f16(<16 x half> %a) { define <32 x half> @reverse_v32f16(<32 x half> %a) { ; CHECK-LABEL: reverse_v32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI34_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI34_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vsext.vf2 v16, v12 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 @@ -820,33 +820,33 @@ define <6 x i64> @reverse_v6i64(<6 x i64> %a) { define <12 x i64> @reverse_v12i64(<12 x i64> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v12i64: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-BITS-UNKNOWN-NEXT: li a1, 32 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vle16.v v24, (a0) +; RV32-BITS-UNKNOWN-NEXT: li a0, 32 +; RV32-BITS-UNKNOWN-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-BITS-UNKNOWN-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vle16.v v24, (a1) ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v12i64: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-BITS-256-NEXT: li a1, 32 -; RV32-BITS-256-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-BITS-256-NEXT: vle16.v v24, (a0) +; RV32-BITS-256-NEXT: li a0, 32 +; RV32-BITS-256-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-BITS-256-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-BITS-256-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-BITS-256-NEXT: vle16.v v24, (a1) ; RV32-BITS-256-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-BITS-256-NEXT: vmv.v.v v8, v16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v12i64: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-BITS-512-NEXT: li a1, 32 -; RV32-BITS-512-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-BITS-512-NEXT: vle16.v v24, (a0) +; RV32-BITS-512-NEXT: li a0, 32 +; RV32-BITS-512-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-BITS-512-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-BITS-512-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-BITS-512-NEXT: vle16.v v24, (a1) ; RV32-BITS-512-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-BITS-512-NEXT: vmv.v.v v8, v16 ; RV32-BITS-512-NEXT: ret @@ -883,11 +883,11 @@ define <12 x i64> @reverse_v12i64(<12 x i64> %a) { ; ; RV32-ZVBB-LABEL: reverse_v12i64: ; RV32-ZVBB: # %bb.0: -; RV32-ZVBB-NEXT: lui a0, %hi(.LCPI46_0) -; RV32-ZVBB-NEXT: addi a0, a0, %lo(.LCPI46_0) -; RV32-ZVBB-NEXT: li a1, 32 -; RV32-ZVBB-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-ZVBB-NEXT: vle16.v v24, (a0) +; RV32-ZVBB-NEXT: li a0, 32 +; RV32-ZVBB-NEXT: lui a1, %hi(.LCPI46_0) +; RV32-ZVBB-NEXT: addi a1, a1, %lo(.LCPI46_0) +; RV32-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-ZVBB-NEXT: vle16.v v24, (a1) ; RV32-ZVBB-NEXT: vrgatherei16.vv v16, v8, v24 ; RV32-ZVBB-NEXT: vmv.v.v v8, v16 ; RV32-ZVBB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll index 0161ac4bc338db..e2580c132f65e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll @@ -225,11 +225,11 @@ declare <16 x i64> @llvm.experimental.stepvector.v16i64() define <16 x i64> @stepvector_v16i64() { ; RV32-LABEL: stepvector_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI16_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI16_0) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vle8.v v16, (a0) +; RV32-NEXT: li a0, 32 +; RV32-NEXT: lui a1, %hi(.LCPI16_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI16_0) +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vle8.v v16, (a1) ; RV32-NEXT: vsext.vf4 v8, v16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll index 29f8eaba900527..e3c7d02462cc7f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vdiv_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vdiv_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll index 3f8eb0ff276b7f..03bd85bf5e69e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vdivu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vdivu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.udiv.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 9789afda9344ad..0b0d758ad8ded8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.smax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.smax.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 36b0a4642b6169..98e630a0e59e5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vmaxu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vmaxu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.umax.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index adb0a30f34d35a..a6e3764b37550d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.smin.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vmin_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.smin.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 671ce82d4ae795..c59b65edd1ec10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vminu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vminu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.umin.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll index 4bbbad5ed0e0e8..ff8a63e371c8ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll @@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.srem.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) define <8 x i7> @vrem_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vrem_vv_v8i7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v9 -; CHECK-NEXT: vsra.vi v9, v9, 1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vrem.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.srem.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll index ee11307bddc88c..b5eec4142c7824 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vremu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: vremu_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vremu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.urem.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll index c4b7c1f2f19f0f..16a0fddfa98277 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll @@ -10,9 +10,8 @@ define <8 x i7> @vsll_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: vsll_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.shl.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll index 7ea5b1f0b505a3..180fafa9659b1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -10,11 +10,10 @@ define <8 x i7> @vsra_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: vsra_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.ashr.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll index 9f9d4af0cc2f3f..22f04803eadd74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll @@ -10,10 +10,9 @@ define <8 x i7> @vsrl_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: vsrl_vv_v8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1, v0.t +; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %v = call <8 x i7> @llvm.vp.lshr.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index f88a9b3081a1a8..372937bb5ca5df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -9,7 +9,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f16( @nearbyint_nxv1f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define @nearbyint_nxv1f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -32,7 +33,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f16( @nearbyint_nxv2f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -40,6 +41,7 @@ define @nearbyint_nxv2f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -55,7 +57,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f16( @nearbyint_nxv4f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -63,6 +65,7 @@ define @nearbyint_nxv4f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -78,7 +81,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f16( @nearbyint_nxv8f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -86,6 +89,7 @@ define @nearbyint_nxv8f16( %v) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -101,7 +105,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv16f16(< define @nearbyint_nxv16f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -109,6 +113,7 @@ define @nearbyint_nxv16f16( %v) strictf ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -124,7 +129,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv32f16(< define @nearbyint_nxv32f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -132,6 +137,7 @@ define @nearbyint_nxv32f16( %v) strictf ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -147,7 +153,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f32( @nearbyint_nxv1f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -155,6 +161,7 @@ define @nearbyint_nxv1f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -170,7 +177,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f32( @nearbyint_nxv2f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -178,6 +185,7 @@ define @nearbyint_nxv2f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -193,7 +201,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f32( @nearbyint_nxv4f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -201,6 +209,7 @@ define @nearbyint_nxv4f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -216,7 +225,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f32( @nearbyint_nxv8f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -224,6 +233,7 @@ define @nearbyint_nxv8f32( %v) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -239,7 +249,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv16f32( define @nearbyint_nxv16f32( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -247,6 +257,7 @@ define @nearbyint_nxv16f32( %v) stric ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -262,7 +273,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv1f64(< define @nearbyint_nxv1f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -270,6 +281,7 @@ define @nearbyint_nxv1f64( %v) strict ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -285,7 +297,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv2f64(< define @nearbyint_nxv2f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -293,6 +305,7 @@ define @nearbyint_nxv2f64( %v) strict ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -308,7 +321,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv4f64(< define @nearbyint_nxv4f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -316,6 +329,7 @@ define @nearbyint_nxv4f64( %v) strict ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -331,7 +345,7 @@ declare @llvm.experimental.constrained.nearbyint.nxv8f64(< define @nearbyint_nxv8f64( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -339,6 +353,7 @@ define @nearbyint_nxv8f64( %v) strict ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll index 3276f481f30ea5..aaa7a538e70fb7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll @@ -9,7 +9,7 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-LABEL: round_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare @llvm.experimental.constrained.round.nxv1f16( @round_nxv2f16( %x) strictfp { ; CHECK-LABEL: round_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define @round_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare @llvm.experimental.constrained.round.nxv2f16( @round_nxv4f16( %x) strictfp { ; CHECK-LABEL: round_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define @round_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare @llvm.experimental.constrained.round.nxv4f16( @round_nxv8f16( %x) strictfp { ; CHECK-LABEL: round_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define @round_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -97,7 +101,7 @@ declare @llvm.experimental.constrained.round.nxv8f16( @round_nxv16f16( %x) strictfp { ; CHECK-LABEL: round_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define @round_nxv16f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -119,7 +124,7 @@ declare @llvm.experimental.constrained.round.nxv16f16( @round_nxv32f16( %x) strictfp { ; CHECK-LABEL: round_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -127,6 +132,7 @@ define @round_nxv32f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -141,7 +147,7 @@ declare @llvm.experimental.constrained.round.nxv32f16( @round_nxv1f32( %x) strictfp { ; CHECK-LABEL: round_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -149,6 +155,7 @@ define @round_nxv1f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,7 +170,7 @@ declare @llvm.experimental.constrained.round.nxv1f32( @round_nxv2f32( %x) strictfp { ; CHECK-LABEL: round_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -171,6 +178,7 @@ define @round_nxv2f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -185,7 +193,7 @@ declare @llvm.experimental.constrained.round.nxv2f32( @round_nxv4f32( %x) strictfp { ; CHECK-LABEL: round_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -193,6 +201,7 @@ define @round_nxv4f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -207,7 +216,7 @@ declare @llvm.experimental.constrained.round.nxv4f32( @round_nxv8f32( %x) strictfp { ; CHECK-LABEL: round_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -215,6 +224,7 @@ define @round_nxv8f32( %x) strictfp { ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -229,7 +239,7 @@ declare @llvm.experimental.constrained.round.nxv8f32( @round_nxv16f32( %x) strictfp { ; CHECK-LABEL: round_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -237,6 +247,7 @@ define @round_nxv16f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -251,7 +262,7 @@ declare @llvm.experimental.constrained.round.nxv16f32( @round_nxv1f64( %x) strictfp { ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -259,6 +270,7 @@ define @round_nxv1f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,7 +285,7 @@ declare @llvm.experimental.constrained.round.nxv1f64( @round_nxv2f64( %x) strictfp { ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -281,6 +293,7 @@ define @round_nxv2f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -295,7 +308,7 @@ declare @llvm.experimental.constrained.round.nxv2f64( @round_nxv4f64( %x) strictfp { ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -303,6 +316,7 @@ define @round_nxv4f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -317,7 +331,7 @@ declare @llvm.experimental.constrained.round.nxv4f64( @round_nxv8f64( %x) strictfp { ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -325,6 +339,7 @@ define @round_nxv8f64( %x) strictfp { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll index 4ebfcccbaaa6e6..cdc01d658778bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -9,7 +9,7 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) @@ -17,6 +17,7 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,7 +32,7 @@ declare @llvm.experimental.constrained.roundeven.nxv1f16( @roundeven_nxv2f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) @@ -39,6 +40,7 @@ define @roundeven_nxv2f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -53,7 +55,7 @@ declare @llvm.experimental.constrained.roundeven.nxv2f16( @roundeven_nxv4f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) @@ -61,6 +63,7 @@ define @roundeven_nxv4f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -75,7 +78,7 @@ declare @llvm.experimental.constrained.roundeven.nxv4f16( @roundeven_nxv8f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) @@ -83,6 +86,7 @@ define @roundeven_nxv8f16( %x) strictfp { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -97,7 +101,7 @@ declare @llvm.experimental.constrained.roundeven.nxv8f16( @roundeven_nxv16f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) @@ -105,6 +109,7 @@ define @roundeven_nxv16f16( %x) strictf ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -119,7 +124,7 @@ declare @llvm.experimental.constrained.roundeven.nxv16f16(< define @roundeven_nxv32f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) @@ -127,6 +132,7 @@ define @roundeven_nxv32f16( %x) strictf ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -141,7 +147,7 @@ declare @llvm.experimental.constrained.roundeven.nxv32f16(< define @roundeven_nxv1f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -149,6 +155,7 @@ define @roundeven_nxv1f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,7 +170,7 @@ declare @llvm.experimental.constrained.roundeven.nxv1f32( @roundeven_nxv2f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 @@ -171,6 +178,7 @@ define @roundeven_nxv2f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -185,7 +193,7 @@ declare @llvm.experimental.constrained.roundeven.nxv2f32( @roundeven_nxv4f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 @@ -193,6 +201,7 @@ define @roundeven_nxv4f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -207,7 +216,7 @@ declare @llvm.experimental.constrained.roundeven.nxv4f32( @roundeven_nxv8f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 @@ -215,6 +224,7 @@ define @roundeven_nxv8f32( %x) strictfp ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -229,7 +239,7 @@ declare @llvm.experimental.constrained.roundeven.nxv8f32( @roundeven_nxv16f32( %x) strictfp { ; CHECK-LABEL: roundeven_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 @@ -237,6 +247,7 @@ define @roundeven_nxv16f32( %x) stric ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -251,7 +262,7 @@ declare @llvm.experimental.constrained.roundeven.nxv16f32( define @roundeven_nxv1f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) @@ -259,6 +270,7 @@ define @roundeven_nxv1f64( %x) strict ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,7 +285,7 @@ declare @llvm.experimental.constrained.roundeven.nxv1f64(< define @roundeven_nxv2f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) @@ -281,6 +293,7 @@ define @roundeven_nxv2f64( %x) strict ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -295,7 +308,7 @@ declare @llvm.experimental.constrained.roundeven.nxv2f64(< define @roundeven_nxv4f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) @@ -303,6 +316,7 @@ define @roundeven_nxv4f64( %x) strict ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -317,7 +331,7 @@ declare @llvm.experimental.constrained.roundeven.nxv4f64(< define @roundeven_nxv8f64( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) @@ -325,6 +339,7 @@ define @roundeven_nxv8f64( %x) strict ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index bc5617957d7d08..2c5a3dfffc2cfc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -1282,18 +1282,17 @@ define @fshr_v1i9( %a, %b, ; CHECK-LABEL: fshr_v1i9: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 511 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t ; CHECK-NEXT: li a0, 9 ; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t ; CHECK-NEXT: vadd.vi v10, v10, 7, v0.t ; CHECK-NEXT: vand.vi v11, v10, 15, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t ; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vnot.v v10, v10, v0.t ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret @@ -1306,18 +1305,17 @@ define @fshl_v1i9( %a, %b, ; CHECK-LABEL: fshl_v1i9: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 511 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v10, v10, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t -; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t ; CHECK-NEXT: li a0, 9 ; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t -; CHECK-NEXT: vnot.v v11, v10, v0.t -; CHECK-NEXT: vand.vi v11, v11, 15, v0.t -; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t +; CHECK-NEXT: vand.vi v11, v10, 15, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t +; CHECK-NEXT: vnot.v v10, v10, v0.t ; CHECK-NEXT: vand.vi v10, v10, 15, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv1i9( %a, %b, %c, %m, i32 %evl) @@ -1330,15 +1328,14 @@ declare @llvm.vp.fshr.nxv1i4(, @fshr_v1i4( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: fshr_v1i4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v10, v10, 15 -; CHECK-NEXT: li a1, 4 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vremu.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsrl.vv v8, v8, v10, v0.t +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vremu.vx v9, v10, a0, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v9, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret %trunca = call @llvm.vp.trunc.nxv1i4.nxv1i8( %a, %m, i32 zeroext %evl) @@ -1353,15 +1350,14 @@ declare @llvm.vp.fshl.nxv1i4(, @fshl_v1i4( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: fshl_v1i4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v10, v10, 15 -; CHECK-NEXT: li a1, 4 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vremu.vx v10, v10, a1, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t ; CHECK-NEXT: vand.vi v9, v9, 15, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t ; CHECK-NEXT: vor.vv v8, v8, v9, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: li a0, 4 +; CHECK-NEXT: vremu.vx v9, v10, a0, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v9, v0.t ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 15, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll index 3665669d83a3d4..21615b516da898 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -7,13 +7,14 @@ define @trunc_nxv1f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -27,13 +28,14 @@ declare @llvm.experimental.constrained.trunc.nxv1f16( @trunc_nxv2f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu @@ -47,13 +49,14 @@ declare @llvm.experimental.constrained.trunc.nxv2f16( @trunc_nxv4f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -67,13 +70,14 @@ declare @llvm.experimental.constrained.trunc.nxv4f16( @trunc_nxv8f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -87,13 +91,14 @@ declare @llvm.experimental.constrained.trunc.nxv8f16( @trunc_nxv16f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -107,13 +112,14 @@ declare @llvm.experimental.constrained.trunc.nxv16f16( @trunc_nxv32f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -127,13 +133,14 @@ declare @llvm.experimental.constrained.trunc.nxv32f16( @trunc_nxv1f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu @@ -147,13 +154,14 @@ declare @llvm.experimental.constrained.trunc.nxv1f32( @trunc_nxv2f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu @@ -167,13 +175,14 @@ declare @llvm.experimental.constrained.trunc.nxv2f32( @trunc_nxv4f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -187,13 +196,14 @@ declare @llvm.experimental.constrained.trunc.nxv4f32( @trunc_nxv8f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -207,13 +217,14 @@ declare @llvm.experimental.constrained.trunc.nxv8f32( @trunc_nxv16f32( %x) strictfp { ; CHECK-LABEL: trunc_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -227,13 +238,14 @@ declare @llvm.experimental.constrained.trunc.nxv16f32( @trunc_nxv1f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu @@ -247,13 +259,14 @@ declare @llvm.experimental.constrained.trunc.nxv1f64( @trunc_nxv2f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -267,13 +280,14 @@ declare @llvm.experimental.constrained.trunc.nxv2f64( @trunc_nxv4f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -287,13 +301,14 @@ declare @llvm.experimental.constrained.trunc.nxv4f64( @trunc_nxv8f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/mul-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mul-combine.ll new file mode 100644 index 00000000000000..6a7da925b4d43d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/mul-combine.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV64 + +define <2 x i16> @test_v2i16(<2 x i16> %x) { +; CHECK-RV32-LABEL: test_v2i16: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 7 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_v2i16: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 7 +; CHECK-RV64-NEXT: ret + %1 = lshr <2 x i16> %x, + %2 = and <2 x i16> %1, + %3 = mul <2 x i16> %2, + ret <2 x i16> %3 +} + +define @test_nxv2i16( %x) { +; CHECK-RV32-LABEL: test_nxv2i16: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-RV32-NEXT: vsrl.vi v8, v8, 7 +; CHECK-RV32-NEXT: li a0, 257 +; CHECK-RV32-NEXT: vand.vx v8, v8, a0 +; CHECK-RV32-NEXT: vsll.vi v8, v8, 8 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_nxv2i16: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-RV64-NEXT: vsrl.vi v8, v8, 7 +; CHECK-RV64-NEXT: li a0, 257 +; CHECK-RV64-NEXT: vand.vx v8, v8, a0 +; CHECK-RV64-NEXT: vsll.vi v8, v8, 8 +; CHECK-RV64-NEXT: ret + %1 = lshr %x, splat (i16 7) + %2 = and %1, splat (i16 257) + %3 = mul %2, splat (i16 256) + ret %3 +} + +define <2 x i32> @test_v2i32(<2 x i32> %x) { +; CHECK-RV32-LABEL: test_v2i32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_v2i32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV64-NEXT: ret + %1 = lshr <2 x i32> %x, + %2 = and <2 x i32> %1, + %3 = mul <2 x i32> %2, + ret <2 x i32> %3 +} + +define @test_nxv2i32( %x) { +; CHECK-RV32-LABEL: test_nxv2i32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_nxv2i32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 15 +; CHECK-RV64-NEXT: ret + %1 = lshr %x, splat (i32 15) + %2 = and %1, splat (i32 65537) + %3 = mul %2, splat (i32 65535) + ret %3 +} + +define <2 x i64> @test_v2i64(<2 x i64> %x) { +; CHECK-RV32-LABEL: test_v2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_v2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV64-NEXT: ret + %1 = lshr <2 x i64> %x, + %2 = and <2 x i64> %1, + %3 = mul <2 x i64> %2, + ret <2 x i64> %3 +} + +define @test_nxv2i64( %x) { +; CHECK-RV32-LABEL: test_nxv2i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-RV32-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: test_nxv2i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-RV64-NEXT: vsra.vi v8, v8, 31 +; CHECK-RV64-NEXT: ret + %1 = lshr %x, splat (i64 31) + %2 = and %1, splat (i64 4294967297) + %3 = mul %2, splat (i64 4294967295) + ret %3 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 6e327457bebffc..368f454fa5fda1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -106,11 +106,11 @@ define <16 x i8> @v16i8(<16 x i8> %a) { define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: v16i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI7_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI7_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI7_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vmv1r.v v14, v9 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vid.v v8 @@ -230,11 +230,11 @@ define <16 x i16> @v16i16(<16 x i16> %a) { define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: v16i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI15_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI15_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI15_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vmv2r.v v20, v10 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vrgather.vv v8, v12, v16 @@ -363,11 +363,11 @@ define <16 x i32> @v16i32(<16 x i32> %a) { define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: v16i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI23_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI23_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI23_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI23_0) +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vle16.v v20, (a1) ; CHECK-NEXT: vmv4r.v v24, v12 ; CHECK-NEXT: vmv4r.v v16, v8 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v20 @@ -548,11 +548,11 @@ define <16 x half> @v16f16(<16 x half> %a) { define <32 x half> @v16f16_2(<16 x half> %a) { ; CHECK-LABEL: v16f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI35_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI35_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0) +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vle16.v v16, (a1) ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -719,11 +719,11 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { define <32 x i8> @v32i8(<32 x i8> %a) { ; CHECK-LABEL: v32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: lui a1, %hi(.LCPI46_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI46_0) +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v12, (a1) ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index 26089706cf99ef..a4b7ca7f39768f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.sdiv.nxv8i7(, @vdiv_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vdiv_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vdiv.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vdiv.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll index f41b885a66eaae..67c3f9dbf2869a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll @@ -10,11 +10,12 @@ define @vdivu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vdivu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index 8a76467986620c..c15caa31bb0986 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.smax.nxv8i7(, @vmax_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 1c74887c1b20fb..df494f8af7387c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -10,11 +10,12 @@ define @vmaxu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vmaxu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 1c71242c3c7d79..794a21c7c6abac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.smin.nxv8i7(, @vmin_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 6d89a9777cf917..d54de281a7fd28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -10,11 +10,12 @@ define @vminu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vminu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index cf85fd827b51f1..2ef96f4b3896fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -9,11 +9,15 @@ declare @llvm.vp.srem.nxv8i7(, @vrem_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vrem_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vrem.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t +; CHECK-NEXT: vrem.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll index 61bdd5b8d3c8a7..1f1ed4a1269acb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll @@ -10,11 +10,12 @@ define @vremu_vx_nxv8i7( %a, i7 signext %b, < ; CHECK-LABEL: vremu_vx_nxv8i7: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a2 ; CHECK-NEXT: vmv.v.x v9, a0 -; CHECK-NEXT: vand.vx v9, v9, a2 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a2, v0.t ; CHECK-NEXT: vremu.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll index c04d5ea2da3c1b..380835494ed17d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -12,8 +12,8 @@ define @vsll_vx_nxv8i7( %a, i7 signext %b, poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll index 632c4db5c5bb57..cff8cc710d21f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -9,13 +9,14 @@ declare @llvm.vp.ashr.nxv8i7(, @vsra_vx_nxv8i7( %a, i7 signext %b, %mask, i32 zeroext %evl) { ; CHECK-LABEL: vsra_vx_nxv8i7: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vsra.vi v8, v8, 1 ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll index ec5b7f3faf7ca8..ff6771b643031f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll @@ -10,11 +10,12 @@ define @vsrl_vx_nxv8i7( %a, i7 signext %b, poison, i7 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index 87d69bfad38c2b..d3e495bb723ad8 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -56,12 +56,12 @@ define void @caller_indirect_tail(i32 %a) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: beqz a0, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: lui a0, %hi(callee_indirect2) -; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2) +; CHECK-NEXT: lui t1, %hi(callee_indirect2) +; CHECK-NEXT: addi t1, t1, %lo(callee_indirect2) ; CHECK-NEXT: jr t1 ; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: lui a0, %hi(callee_indirect1) -; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1) +; CHECK-NEXT: lui t1, %hi(callee_indirect1) +; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1) ; CHECK-NEXT: jr t1 diff --git a/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll b/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll index 2fd4572d234567..65307363048376 100644 --- a/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll +++ b/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll @@ -10,36 +10,30 @@ define signext i32 @unroll_loop_cse() { ; CHECK-LABEL: unroll_loop_cse: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(x) -; CHECK-NEXT: lw a3, %lo(x)(a1) -; CHECK-NEXT: lui a2, %hi(check) -; CHECK-NEXT: lw a4, %lo(check)(a2) +; CHECK-NEXT: lui a0, %hi(x) +; CHECK-NEXT: lw a1, %lo(x)(a0) +; CHECK-NEXT: lui a0, %hi(check) +; CHECK-NEXT: lw a2, %lo(check)(a0) ; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: bne a3, a4, .LBB0_6 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi a1, a1, %lo(x) -; CHECK-NEXT: lw a1, 4(a1) -; CHECK-NEXT: addi a2, a2, %lo(check) -; CHECK-NEXT: lw a2, 4(a2) ; CHECK-NEXT: bne a1, a2, .LBB0_6 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a1, %hi(x) ; CHECK-NEXT: addi a1, a1, %lo(x) -; CHECK-NEXT: lw a3, 8(a1) +; CHECK-NEXT: lw a3, 4(a1) ; CHECK-NEXT: lui a2, %hi(check) ; CHECK-NEXT: addi a2, a2, %lo(check) +; CHECK-NEXT: lw a4, 4(a2) +; CHECK-NEXT: bne a3, a4, .LBB0_6 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lw a3, 8(a1) ; CHECK-NEXT: lw a4, 8(a2) ; CHECK-NEXT: bne a3, a4, .LBB0_6 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: lw a1, 12(a1) -; CHECK-NEXT: lw a2, 12(a2) -; CHECK-NEXT: bne a1, a2, .LBB0_6 +; CHECK-NEXT: lw a3, 12(a1) +; CHECK-NEXT: lw a4, 12(a2) +; CHECK-NEXT: bne a3, a4, .LBB0_6 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: lui a1, %hi(x) -; CHECK-NEXT: addi a1, a1, %lo(x) ; CHECK-NEXT: lw a3, 16(a1) -; CHECK-NEXT: lui a2, %hi(check) -; CHECK-NEXT: addi a2, a2, %lo(check) ; CHECK-NEXT: lw a4, 16(a2) ; CHECK-NEXT: bne a3, a4, .LBB0_6 ; CHECK-NEXT: # %bb.5: diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll similarity index 100% rename from llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll rename to llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll diff --git a/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll b/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll new file mode 100644 index 00000000000000..52a6364e122589 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/eh-option-errors.ll @@ -0,0 +1,22 @@ +target triple = "wasm32-unknown-unknown" + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -wasm-enable-eh 2>&1 | FileCheck %s --check-prefix=EM_EH_W_WASM_EH +; EM_EH_W_WASM_EH: LLVM ERROR: -enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh + +; RUN: not --crash llc < %s -enable-emscripten-sjlj -wasm-enable-sjlj 2>&1 | FileCheck %s --check-prefix=EM_SJLJ_W_WASM_SJLJ +; EM_SJLJ_W_WASM_SJLJ: LLVM ERROR: -enable-emscripten-sjlj not allowed with -wasm-enable-sjlj + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -wasm-enable-sjlj 2>&1 | FileCheck %s --check-prefix=EM_EH_W_WASM_SJLJ +; EM_EH_W_WASM_SJLJ: LLVM ERROR: -enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj + +; RUN: not --crash llc < %s -wasm-enable-exnref 2>&1 | FileCheck %s --check-prefix=WASM_EXNREF_ONLY +; WASM_EXNREF_ONLY: LLVM ERROR: -wasm-enable-exnref should be used with -wasm-enable-eh + +; RUN: not --crash llc < %s -wasm-enable-eh -exception-model=dwarf 2>&1 | FileCheck %s --check-prefix=EH_MODEL_DWARF +; EH_MODEL_DWARF: LLVM ERROR: -exception-model should be either 'none' or 'wasm' + +; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=EM_EH_W_MODEL_WASM +; EM_EH_W_MODEL_WASM: LLVM ERROR: -exception-model=wasm not allowed with -enable-emscripten-cxx-exceptions + +; RUN: not --crash llc < %s -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=MODEL_WASM_WO_WASM_EH_SJLJ +; MODEL_WASM_WO_WASM_EH_SJLJ: LLVM ERROR: -exception-model=wasm only allowed with at least one of -wasm-enable-eh or -wasm-enable-sjlj diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception-legacy.ll similarity index 100% rename from llvm/test/CodeGen/WebAssembly/exception.ll rename to llvm/test/CodeGen/WebAssembly/exception-legacy.ll diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index d9d3f6be800fdd..73ccea8d652db8 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -35,3 +35,71 @@ define float @extract_lane_v8f16(<8 x half> %v) { %r = call float @llvm.wasm.extract.lane.f16x8(<8 x half> %v, i32 1) ret float %r } + +; CHECK-LABEL: add_v8f16: +; CHECK: f16x8.add $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @add_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fadd <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: sub_v8f16: +; CHECK: f16x8.sub $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @sub_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fsub <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: mul_v8f16: +; CHECK: f16x8.mul $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @mul_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fmul <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: div_v8f16: +; CHECK: f16x8.div $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @div_v8f16(<8 x half> %a, <8 x half> %b) { + %r = fdiv <8 x half> %a, %b + ret <8 x half> %r +} + +; CHECK-LABEL: min_intrinsic_v8f16: +; CHECK: f16x8.min $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>) +define <8 x half> @min_intrinsic_v8f16(<8 x half> %x, <8 x half> %y) { + %a = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y) + ret <8 x half> %a +} + +; CHECK-LABEL: max_intrinsic_v8f16: +; CHECK: f16x8.max $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>) +define <8 x half> @max_intrinsic_v8f16(<8 x half> %x, <8 x half> %y) { + %a = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y) + ret <8 x half> %a +} + +; CHECK-LABEL: pmin_intrinsic_v8f16: +; CHECK: f16x8.pmin $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.wasm.pmin.v8f16(<8 x half>, <8 x half>) +define <8 x half> @pmin_intrinsic_v8f16(<8 x half> %a, <8 x half> %b) { + %v = call <8 x half> @llvm.wasm.pmin.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %v +} + +; CHECK-LABEL: pmax_intrinsic_v8f16: +; CHECK: f16x8.pmax $push0=, $0, $1 +; CHECK-NEXT: return $pop0 +declare <8 x half> @llvm.wasm.pmax.v8f16(<8 x half>, <8 x half>) +define <8 x half> @pmax_intrinsic_v8f16(<8 x half> %a, <8 x half> %b) { + %v = call <8 x half> @llvm.wasm.pmax.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %v +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll index 4a63c812d6ae9a..66872a54229862 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-ehsjlj-options.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -enable-emscripten-cxx-exceptions | FileCheck %s --check-prefix=EH ; RUN: llc < %s -enable-emscripten-sjlj | FileCheck %s --check-prefix=SJLJ ; RUN: llc < %s | FileCheck %s --check-prefix=NONE -; RUN: not --crash llc < %s -enable-emscripten-cxx-exceptions -exception-model=wasm 2>&1 | FileCheck %s --check-prefix=WASM-EH-EM-EH target triple = "wasm32-unknown-unknown" @@ -97,5 +96,3 @@ declare void @free(ptr) attributes #0 = { returns_twice } attributes #1 = { noreturn } attributes #2 = { nounwind } - -; WASM-EH-EM-EH: LLVM ERROR: -exception-model=wasm not allowed with -enable-emscripten-cxx-exceptions diff --git a/llvm/test/CodeGen/WebAssembly/reg-argument.mir b/llvm/test/CodeGen/WebAssembly/reg-argument.mir index 23e66dfc71fa1b..a549990bdb0a2b 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-argument.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-argument.mir @@ -68,3 +68,14 @@ body: | %1:externref = ARGUMENT_externref 0, implicit $arguments RETURN implicit-def $arguments ... +--- +name: argument_exnref +# CHECK-LABEL: argument_exnref +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:exnref = ARGUMENT_exnref 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:exnref = ARGUMENT_exnref 0, implicit $arguments + RETURN implicit-def $arguments +... diff --git a/llvm/test/CodeGen/WebAssembly/reg-copy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir index 31a5bfa63a4ea2..763fe42d07b61a 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-copy.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir @@ -77,3 +77,14 @@ body: | %0:externref = COPY %1:externref RETURN implicit-def $arguments ... +--- +name: copy_exnref +# CHECK-LABEL: copy_exnref +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %0:exnref = COPY_EXNREF %1:exnref + ; CHECK-NEXT: RETURN + bb.0: + %0:exnref = COPY %1:exnref + RETURN implicit-def $arguments +... diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s index 769cd7edfa8a3e..ac358c1b5c7a52 100644 --- a/llvm/test/MC/WebAssembly/basic-assembly.s +++ b/llvm/test/MC/WebAssembly/basic-assembly.s @@ -146,12 +146,14 @@ test0: .ident "clang version 9.0.0 (trunk 364502) (llvm/trunk 364571)" -.tabletype empty_eref_table, externref -empty_eref_table: +.tabletype empty_externref_table, externref +empty_externref_table: -.tabletype empty_fref_table, funcref -empty_fref_table: +.tabletype empty_funcref_table, funcref +empty_funcref_table: +.tabletype empty_exnref_table, exnref +empty_exnref_table: # CHECK: .text # CHECK: .globaltype __stack_pointer, i32 @@ -283,8 +285,11 @@ empty_fref_table: # CHECK-NEXT: .p2align 2 # CHECK-NEXT: .int32 test0 -# CHECK: .tabletype empty_eref_table, externref -# CHECK-NEXT: empty_eref_table: +# CHECK: .tabletype empty_externref_table, externref +# CHECK-NEXT: empty_externref_table: -# CHECK: .tabletype empty_fref_table, funcref -# CHECK-NEXT: empty_fref_table: +# CHECK: .tabletype empty_funcref_table, funcref +# CHECK-NEXT: empty_funcref_table: + +# CHECK: .tabletype empty_exnref_table, exnref +# CHECK-NEXT: empty_exnref_table: diff --git a/llvm/test/MC/WebAssembly/reference-types.s b/llvm/test/MC/WebAssembly/reference-types.s index ab3e3ee6b155b1..2f8bfba68dcea1 100644 --- a/llvm/test/MC/WebAssembly/reference-types.s +++ b/llvm/test/MC/WebAssembly/reference-types.s @@ -4,22 +4,27 @@ # CHECK-LABEL:ref_is_null: # CHECK: ref.is_null # encoding: [0xd1] ref_is_null: - .functype ref_is_null () -> (i32, i32) + .functype ref_is_null () -> (i32, i32, i32) ref.null_extern ref.is_null ref.null_func ref.is_null + ref.null_exn + ref.is_null end_function # CHECK-LABEL: ref_null_test: # CHECK: ref.null_func # encoding: [0xd0,0x70] # CHECK: ref.null_extern # encoding: [0xd0,0x6f] +# CHECK: ref.null_exn # encoding: [0xd0,0x69] ref_null_test: .functype ref_null_test () -> () ref.null_func drop ref.null_extern drop + ref.null_exn + drop end_function # CHECK-LABEL: ref_sig_test_funcref: @@ -36,9 +41,17 @@ ref_sig_test_externref: local.get 0 end_function +# CHECK-LABEL: ref_sig_test_exnref: +# CHECK-NEXT: .functype ref_sig_test_exnref (exnref) -> (exnref) +ref_sig_test_exnref: + .functype ref_sig_test_exnref (exnref) -> (exnref) + local.get 0 + end_function + # CHECK-LABEL: ref_select_test: # CHECK: funcref.select # encoding: [0x1b] # CHECK: externref.select # encoding: [0x1b] +# CHECK: exnref.select # encoding: [0x1b] ref_select_test: .functype ref_select_test () -> () ref.null_func @@ -51,15 +64,24 @@ ref_select_test: i32.const 0 externref.select drop + ref.null_exn + ref.null_exn + i32.const 0 + exnref.select + drop end_function # CHECK-LABEL: ref_block_test: # CHECK: block funcref # CHECK: block externref +# CHECK: block exnref ref_block_test: - .functype ref_block_test () -> (externref, funcref) + .functype ref_block_test () -> (exnref, externref, funcref) block funcref block externref + block exnref + ref.null_exn + end_block ref.null_extern end_block ref.null_func diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index d397188a9882ea..113a23da776fa9 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -851,4 +851,28 @@ main: # CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01] f16x8.extract_lane 1 + # CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02] + f16x8.add + + # CHECK: f16x8.sub # encoding: [0xfd,0xb5,0x02] + f16x8.sub + + # CHECK: f16x8.mul # encoding: [0xfd,0xb6,0x02] + f16x8.mul + + # CHECK: f16x8.div # encoding: [0xfd,0xb7,0x02] + f16x8.div + + # CHECK: f16x8.min # encoding: [0xfd,0xb8,0x02] + f16x8.min + + # CHECK: f16x8.max # encoding: [0xfd,0xb9,0x02] + f16x8.max + + # CHECK: f16x8.pmin # encoding: [0xfd,0xba,0x02] + f16x8.pmin + + # CHECK: f16x8.pmax # encoding: [0xfd,0xbb,0x02] + f16x8.pmax + end_function diff --git a/llvm/test/MC/WebAssembly/type-checker-errors.s b/llvm/test/MC/WebAssembly/type-checker-errors.s index 5e28d117501e98..d2841250137a8c 100644 --- a/llvm/test/MC/WebAssembly/type-checker-errors.s +++ b/llvm/test/MC/WebAssembly/type-checker-errors.s @@ -215,6 +215,22 @@ table_fill_type_mismatch_3: table.fill valid_table end_function +table_fill_type_mismatch_4: + .functype table_fill_type_mismatch_4 () -> () + ref.null_exn + i32.const 1 +# CHECK: [[@LINE+1]]:3: error: popped exnref, expected externref + table.fill valid_table + end_function + +table_fill_type_mismatch_5: + .functype table_fill_type_mismatch_5 () -> () + ref.null_exn + i32.const 1 +# CHECK: [[@LINE+1]]:3: error: popped exnref, expected externref + table.fill valid_table + end_function + table_grow_non_exist_table: .functype table_grow_non_exist_table (externref, i32) -> (i32) local.get 0 diff --git a/llvm/test/Transforms/Reassociate/local-cse.ll b/llvm/test/Transforms/Reassociate/local-cse.ll index 4d0467e263f553..d0d609f022b46b 100644 --- a/llvm/test/Transforms/Reassociate/local-cse.ll +++ b/llvm/test/Transforms/Reassociate/local-cse.ll @@ -26,16 +26,16 @@ define void @chain_spanning_several_blocks(i64 %inv1, i64 %inv2, i64 %inv3, i64 ; LOCAL_CSE-LABEL: define void @chain_spanning_several_blocks ; LOCAL_CSE-SAME: (i64 [[INV1:%.*]], i64 [[INV2:%.*]], i64 [[INV3:%.*]], i64 [[INV4:%.*]], i64 [[INV5:%.*]]) { ; LOCAL_CSE-NEXT: bb1: -; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[INV2]], [[INV1]] +; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[INV2]], [[INV1]] ; LOCAL_CSE-NEXT: br label [[BB2:%.*]] ; LOCAL_CSE: bb2: ; LOCAL_CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV4]] -; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw i64 [[CHAIN_A1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV5]] -; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw i64 [[CHAIN_B1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw i64 [[INV3]], [[INV1]] -; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_C0]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV4]] +; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV5]] +; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_B1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw nsw i64 [[INV3]], [[INV1]] +; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_C0]], [[VAL_BB2]] ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -47,11 +47,11 @@ define void @chain_spanning_several_blocks(i64 %inv1, i64 %inv2, i64 %inv3, i64 ; CSE-NEXT: br label [[BB2:%.*]] ; CSE: bb2: ; CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1]] -; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV2]] +; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1]] +; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV2]] ; CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV4]] ; CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV5]] -; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV3]] +; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV3]] ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -90,19 +90,19 @@ define void @chain_spanning_several_blocks_no_entry_anchor() { ; LOCAL_CSE-NEXT: br label [[BB1:%.*]] ; LOCAL_CSE: bb1: ; LOCAL_CSE-NEXT: [[INV1_BB1:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[INV1_BB1]], [[INV2_BB0]] +; LOCAL_CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[INV1_BB1]], [[INV2_BB0]] ; LOCAL_CSE-NEXT: br label [[BB2:%.*]] ; LOCAL_CSE: bb2: ; LOCAL_CSE-NEXT: [[INV3_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[INV4_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[INV5_BB2:%.*]] = call i64 @get_val() ; LOCAL_CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV4_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw i64 [[CHAIN_A1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV5_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw i64 [[CHAIN_B1]], [[VAL_BB2]] -; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1_BB1]] -; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_C0]], [[INV3_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV4_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV5_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_B1]], [[VAL_BB2]] +; LOCAL_CSE-NEXT: [[CHAIN_C0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1_BB1]] +; LOCAL_CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_C0]], [[INV3_BB2]] ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; LOCAL_CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) @@ -120,11 +120,11 @@ define void @chain_spanning_several_blocks_no_entry_anchor() { ; CSE-NEXT: [[INV4_BB2:%.*]] = call i64 @get_val() ; CSE-NEXT: [[INV5_BB2:%.*]] = call i64 @get_val() ; CSE-NEXT: [[VAL_BB2:%.*]] = call i64 @get_val() -; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw i64 [[VAL_BB2]], [[INV1_BB1]] -; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV2_BB0]] +; CSE-NEXT: [[CHAIN_A0:%.*]] = add nuw nsw i64 [[VAL_BB2]], [[INV1_BB1]] +; CSE-NEXT: [[CHAIN_A1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV2_BB0]] ; CSE-NEXT: [[CHAIN_A2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV4_BB2]] ; CSE-NEXT: [[CHAIN_B2:%.*]] = add nuw nsw i64 [[CHAIN_A1]], [[INV5_BB2]] -; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw i64 [[CHAIN_A0]], [[INV3_BB2]] +; CSE-NEXT: [[CHAIN_C1:%.*]] = add nuw nsw i64 [[CHAIN_A0]], [[INV3_BB2]] ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_A2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_B2]]) ; CSE-NEXT: call void @keep_alive(i64 [[CHAIN_C1]]) diff --git a/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll b/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll new file mode 100644 index 00000000000000..fcebc4980e6d7d --- /dev/null +++ b/llvm/test/Transforms/Reassociate/reassoc-add-nsw.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=reassociate -S | FileCheck %s +define i32 @nsw_preserve_nonnegative(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_preserve_nonnegative( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add nsw i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0, !range !1 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_preserve_nuw_nsw(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_preserve_nuw_nsw( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4 +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4 +; CHECK-NEXT: [[ADD0:%.*]] = add nuw nsw i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0 + %v1 = load i32, ptr %ptr1 + %v2 = load i32, ptr %ptr2 + %add0 = add nuw nsw i32 %v1, %v2 + %add1 = add nuw nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_dont_preserve_negative(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_dont_preserve_negative( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add nsw i32 %add0, %v0 + ret i32 %add1 +} + +define i32 @nsw_nopreserve_notallnsw(ptr %ptr0, ptr %ptr1, ptr %ptr2) { +; CHECK-LABEL: define i32 @nsw_nopreserve_notallnsw( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], ptr [[PTR2:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[PTR0]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[PTR1]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[PTR2]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[ADD0:%.*]] = add i32 [[V1]], [[V0]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ADD0]], [[V2]] +; CHECK-NEXT: ret i32 [[ADD1]] +; + %v0 = load i32, ptr %ptr0, !range !1 + %v1 = load i32, ptr %ptr1, !range !1 + %v2 = load i32, ptr %ptr2, !range !1 + %add0 = add nsw i32 %v1, %v2 + %add1 = add i32 %add0, %v0 + ret i32 %add1 +} + +; Positive 32 bit integers +!1 = !{i32 0, i32 2147483648} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 -2147483648} +;. diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof new file mode 100644 index 00000000000000..d1c0408210f498 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof @@ -0,0 +1,3 @@ +foo:100:100 + 1: bar:100 + 1:100 diff --git a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll new file mode 100644 index 00000000000000..914ab4f1e3da58 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=100 2>&1 | FileCheck %s + +; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100) +; CHECK: define dso_local noundef i32 @foo(i32 noundef %0) +; CHECK-NOT: %2 = tail call noundef i32 @bar(i32 noundef %0) +; CHECK-NEXT: %2 = icmp sgt i32 %0, 1 +; CHECK-NEXT: br i1 %2, label %3, label %bar.exit + +; Manually lower cost threshold for hot function inlining, so that the function +; is not inlined even profile indicates it as hot. +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -sample-profile-hot-inline-threshold=1 2>&1 | FileCheck %s --check-prefix=COST + +; COST-NOT: remark +; COST: define dso_local noundef i32 @foo(i32 noundef %0) +; COST-NEXT: %2 = tail call noundef i32 @bar(i32 noundef %0) + +define dso_local noundef i32 @bar(i32 noundef %0) #0 !dbg !10 { + %2 = icmp sgt i32 %0, 1 + br i1 %2, label %3, label %15 +3: ; preds = %1 + %4 = add nsw i32 %0, -2 + %5 = mul i32 %4, %4 + %6 = add i32 %5, %0 + %7 = zext nneg i32 %4 to i33 + %8 = add nsw i32 %0, -3 + %9 = zext i32 %8 to i33 + %10 = mul i33 %7, %9 + %11 = lshr i33 %10, 1 + %12 = trunc nuw i33 %11 to i32 + %13 = xor i32 %12, -1 + %14 = add i32 %6, %13 + br label %15 +15: ; preds = %3, %1 + %16 = phi i32 [ 0, %1 ], [ %14, %3 ] + ret i32 %16 +} + +define dso_local noundef i32 @foo(i32 noundef %0) #1 !dbg !20 { + %2 = tail call noundef i32 @bar(i32 noundef %0), !dbg !24 + ret i32 %2 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" } +attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug) +!1 = !DIFile(filename: "a.cc", directory: ".") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 1, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!11 = !DIFile(filename: "a.cc", directory: ".") +!12 = !DISubroutineType(types: !13) +!13 = !{!14, !14} +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !11, file: !11, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!23 = !DILocation(line: 0, scope: !20) +!24 = !DILocation(line: 6, column: 12, scope: !20) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll index 18cbd857d97bb2..2cd9abf0e11e94 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -98,7 +98,7 @@ if.end: ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '15' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '2147483647' +;YAML-NEXT: - Threshold: '3000' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: foo diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll index 997e02bb5b5444..9c0143ae65ca77 100644 --- a/llvm/test/Transforms/SampleProfile/remarks.ll +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -22,7 +22,7 @@ ; We are expecting foo() to be inlined in main() (almost all the cycles are ; spent inside foo). -; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21; +; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=3000) at callsite main:0:21; ; CHECK: remark: remarks.cc:9:19: 'rand' inlined into 'main' to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21; ; The back edge for the loop is the hottest edge in the loop subgraph. @@ -51,7 +51,7 @@ ;YAML-NEXT: - String: '(cost=' ;YAML-NEXT: - Cost: '130' ;YAML-NEXT: - String: ', threshold=' -;YAML-NEXT: - Threshold: '2147483647' +;YAML-NEXT: - Threshold: '3000' ;YAML-NEXT: - String: ')' ;YAML-NEXT: - String: ' at callsite ' ;YAML-NEXT: - String: main diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-versions.test similarity index 77% rename from llvm/test/tools/llvm-profdata/memprof-merge-v0.test rename to llvm/test/tools/llvm-profdata/memprof-merge-versions.test index 28f65e0781bc63..aa7d0329425dc5 100644 --- a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test +++ b/llvm/test/tools/llvm-profdata/memprof-merge-versions.test @@ -19,6 +19,12 @@ RUN: llvm-profdata show %t.prof.v2 | FileCheck %s RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=2 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v2 RUN: llvm-profdata show %t.prof.v2 | FileCheck %s +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=3 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v3 +RUN: llvm-profdata show %t.prof.v3 | FileCheck %s + +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=3 --memprof-full-schema --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v3 +RUN: llvm-profdata show %t.prof.v3 | FileCheck %s + For now we only check the validity of the instrumented profile since we don't have a way to display the contents of the memprof indexed format yet. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 28c3afa1016473..fae6d1e989ab5a 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -306,7 +306,8 @@ cl::opt MemProfVersionRequested( cl::init(memprof::Version0), cl::values(clEnumValN(memprof::Version0, "0", "version 0"), clEnumValN(memprof::Version1, "1", "version 1"), - clEnumValN(memprof::Version2, "2", "version 2"))); + clEnumValN(memprof::Version2, "2", "version 2"), + clEnumValN(memprof::Version3, "3", "version 3"))); cl::opt MemProfFullSchema( "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand), diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp index a8970d8bcbacdc..d89a1f078328b5 100644 --- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp +++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp @@ -277,8 +277,22 @@ void X86InstrMappingEmitter::emitNFTransformTable( if (Pos == std::string::npos) continue; - if (auto *NewRec = Records.getDef(Name.erase(Pos, 3))) + if (auto *NewRec = Records.getDef(Name.erase(Pos, 3))) { +#ifndef NDEBUG + auto ClobberEFLAGS = [](const Record *R) { + return llvm::any_of( + R->getValueAsListOfDefs("Defs"), + [](const Record *Def) { return Def->getName() == "EFLAGS"; }); + }; + if (ClobberEFLAGS(Rec)) + report_fatal_error("EFLAGS should not be clobbered by " + + Rec->getName()); + if (!ClobberEFLAGS(NewRec)) + report_fatal_error("EFLAGS should be clobbered by " + + NewRec->getName()); +#endif Table.push_back(std::pair(&Target.getInstruction(NewRec), Inst)); + } } printTable(Table, "X86NFTransformTable", "GET_X86_NF_TRANSFORM_TABLE", OS); } diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b642b2c82e6d8d..6bd56dd4117b03 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -384,6 +384,9 @@ if (current_toolchain == default_toolchain) { "__concepts/totally_ordered.h", "__condition_variable/condition_variable.h", "__config", + "__configuration/abi.h", + "__configuration/compiler.h", + "__configuration/platform.h", "__coroutine/coroutine_handle.h", "__coroutine/coroutine_traits.h", "__coroutine/noop_coroutine_handle.h", diff --git a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn index c99c1b5483355b..f0bf6a8f3dbaf8 100644 --- a/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/API/BUILD.gn @@ -40,6 +40,8 @@ target(liblldb_type, "liblldb") { include_dirs = [ ".." ] sources = [ "SBAddress.cpp", + "SBAddressRange.cpp", + "SBAddressRangeList.cpp", "SBAttachInfo.cpp", "SBBlock.cpp", "SBBreakpoint.cpp", diff --git a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn index 30a9fb3ecceaa0..0c9632a0a1915f 100644 --- a/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Core/BUILD.gn @@ -45,6 +45,7 @@ static_library("Core") { sources = [ "Address.cpp", "AddressRange.cpp", + "AddressRangeListImpl.cpp", "AddressResolver.cpp", "AddressResolverFileLine.cpp", "Communication.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index 78a9d20812ef9b..8264f6d73e791e 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -12,9 +12,9 @@ tablegen("X86GenDAGISel") { td_file = "X86.td" } -tablegen("X86GenCompressEVEXTables") { +tablegen("X86GenInstrMapping") { visibility = [ ":LLVMX86CodeGen" ] - args = [ "-gen-x86-compress-evex-tables" ] + args = [ "-gen-x86-instr-mapping" ] td_file = "X86.td" } @@ -48,11 +48,11 @@ tablegen("X86GenRegisterBank") { static_library("LLVMX86CodeGen") { deps = [ ":X86GenCallingConv", - ":X86GenCompressEVEXTables", ":X86GenDAGISel", ":X86GenFastISel", ":X86GenFoldTables", ":X86GenGlobalISel", + ":X86GenInstrMapping", ":X86GenRegisterBank", "MCTargetDesc", "TargetInfo", diff --git a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn index f3ae5b5899ac6a..2e11d25767cd00 100644 --- a/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/TableGen/BUILD.gn @@ -64,7 +64,7 @@ executable("llvm-tblgen") { "SearchableTableEmitter.cpp", "SubtargetEmitter.cpp", "WebAssemblyDisassemblerEmitter.cpp", - "X86CompressEVEXTablesEmitter.cpp", + "X86InstrMappingEmitter.cpp", "X86DisassemblerTables.cpp", "X86FoldTablesEmitter.cpp", "X86MnemonicTables.cpp", diff --git a/llvm/utils/vim/syntax/llvm.vim b/llvm/utils/vim/syntax/llvm.vim index d86e3d1ddbc27f..905d696400ca37 100644 --- a/llvm/utils/vim/syntax/llvm.vim +++ b/llvm/utils/vim/syntax/llvm.vim @@ -150,6 +150,7 @@ syn keyword llvmKeyword \ preallocated \ private \ protected + \ ptrauth \ ptx_device \ ptx_kernel \ readnone diff --git a/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h index 8bd7cf880c6afb..191c023fb642cb 100644 --- a/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h +++ b/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h @@ -24,51 +24,6 @@ namespace mlir { namespace dataflow { -/// This lattice value represents the integer range of an SSA value. -class IntegerValueRange { -public: - /// Create a maximal range ([0, uint_max(t)] / [int_min(t), int_max(t)]) - /// range that is used to mark the value as unable to be analyzed further, - /// where `t` is the type of `value`. - static IntegerValueRange getMaxRange(Value value); - - /// Create an integer value range lattice value. - IntegerValueRange(std::optional value = std::nullopt) - : value(std::move(value)) {} - - /// Whether the range is uninitialized. This happens when the state hasn't - /// been set during the analysis. - bool isUninitialized() const { return !value.has_value(); } - - /// Get the known integer value range. - const ConstantIntRanges &getValue() const { - assert(!isUninitialized()); - return *value; - } - - /// Compare two ranges. - bool operator==(const IntegerValueRange &rhs) const { - return value == rhs.value; - } - - /// Take the union of two ranges. - static IntegerValueRange join(const IntegerValueRange &lhs, - const IntegerValueRange &rhs) { - if (lhs.isUninitialized()) - return rhs; - if (rhs.isUninitialized()) - return lhs; - return IntegerValueRange{lhs.getValue().rangeUnion(rhs.getValue())}; - } - - /// Print the integer value range. - void print(raw_ostream &os) const { os << value; } - -private: - /// The known integer value range. - std::optional value; -}; - /// This lattice element represents the integer value range of an SSA value. /// When this lattice is updated, it automatically updates the constant value /// of the SSA value (if the range can be narrowed to one). diff --git a/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h b/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h index 3953c83f3aa106..76a4b1b1563366 100644 --- a/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h +++ b/mlir/include/mlir/Conversion/TosaToTensor/TosaToTensor.h @@ -16,6 +16,7 @@ #include "mlir/Pass/Pass.h" namespace mlir { +class TypeConverter; #define GEN_PASS_DECL_TOSATOTENSOR #include "mlir/Conversion/Passes.h.inc" @@ -24,7 +25,8 @@ namespace tosa { std::unique_ptr createTosaToTensor(); -void populateTosaToTensorConversionPatterns(RewritePatternSet *patterns); +void populateTosaToTensorConversionPatterns(TypeConverter &converter, + RewritePatternSet *patterns); } // namespace tosa } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index ead52332e8eec3..81ed0f924a2e2c 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -49,7 +49,7 @@ class Arith_BinaryOp traits = []> : // Base class for integer binary operations. class Arith_IntBinaryOp traits = []> : Arith_BinaryOp]>, + [DeclareOpInterfaceMethods]>, Arguments<(ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs)>, Results<(outs SignlessIntegerLike:$result)>; @@ -83,12 +83,25 @@ class Arith_FloatBinaryOp traits = []> : attr-dict `:` type($result) }]; } +// Checks that tensor input and outputs have identical shapes. This is stricker +// than the verification done in `SameOperandsAndResultShape` that allows for +// tensor dimensions to be 'compatible' (e.g., dynamic dimensions being +// compatible with static ones). +def SameInputOutputTensorDims : PredOpTrait< + "input and output have the same tensor dimensions", + AllMatchSameOperatorPred<["in", "out"], + "(::llvm::isa<::mlir::TensorType>($_self.getType()) ?" + " ::llvm::cast<::mlir::TensorType>($_self.getType()).getShape() :" + " ::llvm::ArrayRef{})">>; + // Base class for arithmetic cast operations. Requires a single operand and -// result. If either is a shaped type, then the other must be of the same shape. +// result. If either is a shaped type, then the other must be of the same +// shape. In the case of tensor types, this also includes the corresponding +// operand/result dimensions being equal. class Arith_CastOp traits = []> : Arith_Op]>, + SameInputOutputTensorDims, DeclareOpInterfaceMethods]>, Arguments<(ins From:$in)>, Results<(outs To:$out)> { let assemblyFormat = "$in attr-dict `:` type($in) `to` type($out)"; @@ -107,7 +120,7 @@ class Arith_IToICastOp traits = []> : Arith_CastOp]>; + [DeclareOpInterfaceMethods]>; // Cast from an integer type to a floating point type. class Arith_IToFCastOp traits = []> : Arith_CastOp; @@ -139,7 +152,7 @@ class Arith_CompareOpOfAnyRank traits = []> : class Arith_IntBinaryOpWithOverflowFlags traits = []> : Arith_BinaryOp, + [Pure, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]>, Arguments<(ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs, DefaultValuedAttr< @@ -159,7 +172,7 @@ def Arith_ConstantOp : Op, AllTypesMatch<["value", "result"]>, - DeclareOpInterfaceMethods]> { + DeclareOpInterfaceMethods]> { let summary = "integer or floating point constant"; let description = [{ The `constant` operation produces an SSA value equal to some integer or @@ -1231,7 +1244,7 @@ def Arith_TruncIOp : Arith_IToICastOp<"trunci"> { def Arith_TruncFOp : Arith_Op<"truncf", - [Pure, SameOperandsAndResultShape, + [Pure, SameOperandsAndResultShape, SameInputOutputTensorDims, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]>, Arguments<(ins FloatLike:$in, @@ -1327,7 +1340,7 @@ def IndexCastTypeConstraint : TypeConstraint]> { + [DeclareOpInterfaceMethods]> { let summary = "cast between index and integer types"; let description = [{ Casts between scalar or vector integers and corresponding 'index' scalar or @@ -1346,7 +1359,7 @@ def Arith_IndexCastOp def Arith_IndexCastUIOp : Arith_CastOp<"index_castui", IndexCastTypeConstraint, IndexCastTypeConstraint, - [DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods]> { let summary = "unsigned cast between index and integer types"; let description = [{ Casts between scalar or vector integers and corresponding 'index' scalar or @@ -1400,7 +1413,7 @@ def Arith_BitcastOp : Arith_CastOp<"bitcast", BitcastTypeConstraint, def Arith_CmpIOp : Arith_CompareOpOfAnyRank<"cmpi", - [DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods]> { let summary = "integer comparison operation"; let description = [{ The `cmpi` operation is a generic comparison for integer-like types. Its two @@ -1555,7 +1568,7 @@ class ScalarConditionOrMatchingShape names> : def SelectOp : Arith_Op<"select", [Pure, AllTypesMatch<["true_value", "false_value", "result"]>, ScalarConditionOrMatchingShape<["condition", "result"]>, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, ] # ElementwiseMappable.traits> { let summary = "select operation"; let description = [{ diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h index cbc6147cb81e22..9dc262cc72ed00 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.h @@ -24,9 +24,6 @@ namespace arith { class WideIntEmulationConverter; class NarrowTypeEmulationConverter; -/// Create a pass to bufferize arith.constant ops. -std::unique_ptr createConstantBufferizePass(uint64_t alignment = 0); - /// Adds patterns to emulate wide Arith and Function ops over integer /// types into supported ones. This is done by splitting original power-of-two /// i2N integer types into two iN halves. diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td index 4096e309199e98..550c5c0cf4f60f 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td @@ -11,22 +11,6 @@ include "mlir/Pass/PassBase.td" -def ArithBufferizePass : Pass<"arith-bufferize", "ModuleOp"> { - let summary = "Bufferize Arith dialect ops."; - let description = [{ - This pass bufferizes arith dialect ops. - - This pass needs to be a module pass because it inserts memref.global - ops into the module, which cannot be done safely from a function pass due to - multi-threading. Most other bufferization passes can run in parallel at - function granularity. - }]; - let options = [ - Option<"alignment", "alignment", "unsigned", /*default=*/"0", - "Create global memrefs with a specified alignment">, - ]; -} - def ArithExpandOpsPass : Pass<"arith-expand"> { let summary = "Legalize Arith ops to be convertible to LLVM."; let dependentDialects = ["vector::VectorDialect"]; diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index 459c252b707121..e053e6c97e1430 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -221,9 +221,6 @@ createPromoteBuffersToStackPass(std::function isSmallAlloc); /// insert_slice ops. std::unique_ptr createEmptyTensorEliminationPass(); -/// Create a pass that bufferizes ops from the bufferization dialect. -std::unique_ptr createBufferizationBufferizePass(); - //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td index 75ce85c9128c94..8f8826b9ad56b4 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td @@ -350,11 +350,6 @@ def FinalizingBufferize : Pass<"finalizing-bufferize", "func::FuncOp"> { let constructor = "mlir::bufferization::createFinalizingBufferizePass()"; } -def BufferizationBufferize : Pass<"bufferization-bufferize", "func::FuncOp"> { - let summary = "Bufferize the `bufferization` dialect"; - let constructor = "mlir::bufferization::createBufferizationBufferizePass()"; -} - def DropEquivalentBufferResults : Pass<"drop-equivalent-buffer-results", "ModuleOp"> { let summary = "Remove MemRef return values that are equivalent to a bbArg"; let description = [{ diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 1da68ed2176d8f..10719aae5c8b46 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -52,7 +52,7 @@ def GPU_DimensionAttr : EnumAttr; class GPU_IndexOp traits = []> : GPU_Op, + DeclareOpInterfaceMethods, DeclareOpInterfaceMethods])>, Arguments<(ins GPU_DimensionAttr:$dimension)>, Results<(outs Index)> { let assemblyFormat = "$dimension attr-dict"; @@ -144,7 +144,7 @@ def GPU_ThreadIdOp : GPU_IndexOp<"thread_id"> { } def GPU_LaneIdOp : GPU_Op<"lane_id", [ - Pure, DeclareOpInterfaceMethods]> { + Pure, DeclareOpInterfaceMethods]> { let description = [{ Returns the lane id within the subgroup (warp/wave). @@ -158,7 +158,7 @@ def GPU_LaneIdOp : GPU_Op<"lane_id", [ } def GPU_SubgroupIdOp : GPU_Op<"subgroup_id", [ - Pure, DeclareOpInterfaceMethods]>, + Pure, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the subgroup id, i.e., the index of the current subgroup within the @@ -190,7 +190,7 @@ def GPU_GlobalIdOp : GPU_IndexOp<"global_id"> { def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [ - Pure, DeclareOpInterfaceMethods]>, + Pure, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the number of subgroups within a workgroup. @@ -206,7 +206,7 @@ def GPU_NumSubgroupsOp : GPU_Op<"num_subgroups", [ } def GPU_SubgroupSizeOp : GPU_Op<"subgroup_size", [ - Pure, DeclareOpInterfaceMethods]>, + Pure, DeclareOpInterfaceMethods]>, Arguments<(ins)>, Results<(outs Index:$result)> { let description = [{ Returns the number of threads within a subgroup. @@ -687,7 +687,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [ def GPU_LaunchOp : GPU_Op<"launch", [ AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, RecursiveMemoryEffects]>, Arguments<(ins Variadic:$asyncDependencies, Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ, diff --git a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td index c6079cb8a98c81..a30ae9f739cbc6 100644 --- a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td +++ b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td @@ -25,7 +25,7 @@ include "mlir/IR/OpBase.td" /// Base class for Index dialect operations. class IndexOp traits = []> : Op] # traits>; + [DeclareOpInterfaceMethods] # traits>; //===----------------------------------------------------------------------===// // IndexBinaryOp diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index d36d1e70f0b14d..f2955d55e59eca 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -22,10 +22,6 @@ namespace func { class FuncOp; } // namespace func -namespace bufferization { -struct OneShotBufferizationOptions; -} // namespace bufferization - #define GEN_PASS_DECL #include "mlir/Dialect/Linalg/Passes.h.inc" // IWYU pragma: keep diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 0a4ce8953136dd..0621a9f33ba1e8 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -89,16 +89,6 @@ def LinalgInlineScalarOperandsPass : Pass<"linalg-inline-scalar-operands"> { ]; } -def LinalgBufferizePass : Pass<"linalg-bufferize"> { - let summary = "Bufferize the linalg dialect"; - let dependentDialects = [ - "affine::AffineDialect", - "bufferization::BufferizationDialect", - "linalg::LinalgDialect", - "memref::MemRefDialect", - ]; -} - def LinalgGeneralizeNamedOpsPass : Pass<"linalg-generalize-named-ops"> { let summary = "Convert named ops into generic ops"; let dependentDialects = ["linalg::LinalgDialect"]; diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h index cfb637f133f54c..28e17459ff9625 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h @@ -47,13 +47,6 @@ void populateShapeRewritePatterns(RewritePatternSet &patterns); void populateRemoveShapeConstraintsPatterns(RewritePatternSet &patterns); std::unique_ptr> createRemoveShapeConstraintsPass(); -// Bufferizes shape dialect ops. -// -// Note that most shape dialect ops must be converted to std before -// bufferization happens, as they are intended to be bufferized at the std -// level. -std::unique_ptr> createShapeBufferizePass(); - /// Outline the shape computation part by adding shape.func and populate /// conrresponding mapping infomation into ShapeMappingAnalysis. std::unique_ptr> createOutlineShapeComputationPass(); diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td index 9dfda9ea336153..83834509b4a35a 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.td @@ -103,11 +103,4 @@ def ShapeToShapeLowering : Pass<"shape-to-shape-lowering", "func::FuncOp"> { let constructor = "mlir::createShapeToShapeLowering()"; } -// TODO: Generalize this to allow any type conversions desired. -def ShapeBufferize : Pass<"shape-bufferize", "func::FuncOp"> { - let summary = "Bufferize the shape dialect."; - let constructor = "mlir::createShapeBufferizePass()"; - let dependentDialects = ["bufferization::BufferizationDialect", - "memref::MemRefDialect"]; -} #endif // MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index bb49d6c256f21b..d6d038ef65bdf4 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -65,12 +65,6 @@ void populateSparseAssembler(RewritePatternSet &patterns, bool directOut); std::unique_ptr createSparseAssembler(); std::unique_ptr createSparseAssembler(bool directOut); -//===----------------------------------------------------------------------===// -// The SparseEncodingPropagation pass. -//===----------------------------------------------------------------------===// - -std::unique_ptr createSparseEncodingPropagationPass(); - //===----------------------------------------------------------------------===// // The SparseReinterpretMap pass. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td index 94c3ca60030eeb..2f844cee5ff528 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -40,42 +40,6 @@ def SparseAssembler : Pass<"sparse-assembler", "ModuleOp"> { ]; } -def SparseEncodingPropagation : Pass<"sparse-encoding-propagation", "func::FuncOp"> { - let summary = "Propagate sparse tensor encodings"; - let description = [{ - A pass that propagates sparse tensor encodings. - - Background: To avoid introducing repetitive operations, sparse tensors - in MLIR try to reuse tensor operations whenever available. However, most - tensor operations are canonicalized/transformed without the knowledge - of sparsity. The pass tries to propagate missing sparse encodings. - - For example: - ```mlir - %s = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2x1xf32, #sparse> - - // After rank reducing (by tensor dialect transformation) - %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2xf32> - %s = tensor.expand_shape [[0, 1]] %t - : tensor<2xf32> to tensor<2x1xf32, #sparse> - - // After sparsity propagation - %t = tensor.extract_slice %input[0, 0,] [2, 1] [1, 1] - : tensor<2x3xf32, #sparse> to tensor<2xf32, #sparse1> - %s = tensor.expand_shape [[0, 1]] %t - : tensor<2xf32, #sparse1> to tensor<2x1xf32, #sparse> - ``` - }]; - - let constructor = "mlir::createSparseEncodingPropagationPass()"; - let dependentDialects = [ - "sparse_tensor::SparseTensorDialect", - "tensor::TensorDialect", - ]; -} - def SparseReinterpretMap : Pass<"sparse-reinterpret-map", "ModuleOp"> { let summary = "Reinterprets sparse tensor type mappings"; let description = [{ diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h index 48f9066934a25e..964c35b3f15b80 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h @@ -21,9 +21,6 @@ namespace tensor { /// Creates an instance of the `tensor` subset folding pass. std::unique_ptr createFoldTensorSubsetOpsPass(); -/// Creates an instance of the `tensor` dialect bufferization pass. -std::unique_ptr createTensorBufferizePass(); - //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td index 4cc3844f29120b..be4c333836ec07 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td @@ -27,9 +27,4 @@ def FoldTensorSubsetOps : Pass<"fold-tensor-subset-ops"> { ]; } -def TensorBufferize : Pass<"tensor-bufferize", "func::FuncOp"> { - let summary = "Bufferize the `tensor` dialect"; - let constructor = "mlir::tensor::createTensorBufferizePass()"; -} - #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h index fbfc56dfe2cf4f..1f9522b51a4cf5 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h @@ -18,6 +18,7 @@ #include "mlir/Pass/Pass.h" namespace mlir { +class TypeConverter; namespace tosa { #define GEN_PASS_DECL @@ -38,6 +39,8 @@ void populateTosaConstantReduction(MLIRContext *ctx, RewritePatternSet &patterns, bool aggressiveReduceConstant); +void populateTosaTypeConversion(TypeConverter &converter); + std::unique_ptr createTosaLayerwiseConstantFoldPass(); std::unique_ptr createTosaLayerwiseConstantFoldPass( const TosaLayerwiseConstantFoldPassOptions &options); diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h index 911402551e14d4..5667f4fa95ace4 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h @@ -17,9 +17,6 @@ namespace vector { #define GEN_PASS_DECL #include "mlir/Dialect/Vector/Transforms/Passes.h.inc" -/// Creates an instance of the `vector` dialect bufferization pass. -std::unique_ptr createVectorBufferizePass(); - /// Creates an instance of the `vector.mask` lowering pass. std::unique_ptr createLowerVectorMaskPass(); diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td index 31a0b3b2f0c53d..74369987497910 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td @@ -11,11 +11,6 @@ include "mlir/Pass/PassBase.td" -def VectorBufferize : Pass<"vector-bufferize", "func::FuncOp"> { - let summary = "Bufferize Vector dialect ops"; - let constructor = "mlir::vector::createVectorBufferizePass()"; -} - def LowerVectorMaskPass : Pass<"lower-vector-mask", "func::FuncOp"> { let summary = "Lower 'vector.mask' operations"; let constructor = "mlir::vector::createLowerVectorMaskPass()"; diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h index 05064a72ef02e7..0e107e88f5232f 100644 --- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h +++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h @@ -105,10 +105,83 @@ class ConstantIntRanges { raw_ostream &operator<<(raw_ostream &, const ConstantIntRanges &); +/// This lattice value represents the integer range of an SSA value. +class IntegerValueRange { +public: + /// Create a maximal range ([0, uint_max(t)] / [int_min(t), int_max(t)]) + /// range that is used to mark the value as unable to be analyzed further, + /// where `t` is the type of `value`. + static IntegerValueRange getMaxRange(Value value); + + /// Create an integer value range lattice value. + IntegerValueRange(ConstantIntRanges value) : value(std::move(value)) {} + + /// Create an integer value range lattice value. + IntegerValueRange(std::optional value = std::nullopt) + : value(std::move(value)) {} + + /// Whether the range is uninitialized. This happens when the state hasn't + /// been set during the analysis. + bool isUninitialized() const { return !value.has_value(); } + + /// Get the known integer value range. + const ConstantIntRanges &getValue() const { + assert(!isUninitialized()); + return *value; + } + + /// Compare two ranges. + bool operator==(const IntegerValueRange &rhs) const { + return value == rhs.value; + } + + /// Compute the least upper bound of two ranges. + static IntegerValueRange join(const IntegerValueRange &lhs, + const IntegerValueRange &rhs) { + if (lhs.isUninitialized()) + return rhs; + if (rhs.isUninitialized()) + return lhs; + return IntegerValueRange{lhs.getValue().rangeUnion(rhs.getValue())}; + } + + /// Print the integer value range. + void print(raw_ostream &os) const { os << value; } + +private: + /// The known integer value range. + std::optional value; +}; + +raw_ostream &operator<<(raw_ostream &, const IntegerValueRange &); + /// The type of the `setResultRanges` callback provided to ops implementing /// InferIntRangeInterface. It should be called once for each integer result /// value and be passed the ConstantIntRanges corresponding to that value. -using SetIntRangeFn = function_ref; +using SetIntRangeFn = + llvm::function_ref; + +/// Similar to SetIntRangeFn, but operating on IntegerValueRange lattice values. +/// This is the `setResultRanges` callback for the IntegerValueRange based +/// interface method. +using SetIntLatticeFn = + llvm::function_ref; + +class InferIntRangeInterface; + +namespace intrange::detail { +/// Default implementation of `inferResultRanges` which dispatches to the +/// `inferResultRangesFromOptional`. +void defaultInferResultRanges(InferIntRangeInterface interface, + ArrayRef argRanges, + SetIntLatticeFn setResultRanges); + +/// Default implementation of `inferResultRangesFromOptional` which dispatches +/// to the `inferResultRanges`. +void defaultInferResultRangesFromOptional(InferIntRangeInterface interface, + ArrayRef argRanges, + SetIntRangeFn setResultRanges); +} // end namespace intrange::detail } // end namespace mlir #include "mlir/Interfaces/InferIntRangeInterface.h.inc" diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.td b/mlir/include/mlir/Interfaces/InferIntRangeInterface.td index dbdc526c6f10b6..6ee436ce4d6c2f 100644 --- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.td +++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.td @@ -28,9 +28,10 @@ def InferIntRangeInterface : OpInterface<"InferIntRangeInterface"> { Infer the bounds on the results of this op given the bounds on its arguments. For each result value or block argument (that isn't a branch argument, since the dataflow analysis handles those case), the method should call - `setValueRange` with that `Value` as an argument. When `setValueRange` - is not called for some value, it will recieve a default value of the mimimum - and maximum values for its type (the unbounded range). + `setValueRange` with that `Value` as an argument. When implemented, + `setValueRange` should be called on all result values for the operation. + When operations take non-integer inputs, the + `inferResultRangesFromOptional` method should be implemented instead. When called on an op that also implements the RegionBranchOpInterface or BranchOpInterface, this method should not attempt to infer the values @@ -39,14 +40,39 @@ def InferIntRangeInterface : OpInterface<"InferIntRangeInterface"> { This function will only be called when at least one result of the op is a scalar integer value or the op has a region. + }], + /*retTy=*/"void", + /*methodName=*/"inferResultRanges", + /*args=*/(ins "::llvm::ArrayRef<::mlir::ConstantIntRanges>":$argRanges, + "::mlir::SetIntRangeFn":$setResultRanges), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + ::mlir::intrange::detail::defaultInferResultRangesFromOptional($_op, + argRanges, + setResultRanges); + }]>, + + InterfaceMethod<[{ + Infer the bounds on the results of this op given the lattice representation + of the bounds for its arguments. For each result value or block argument + (that isn't a branch argument, since the dataflow analysis handles + those case), the method should call `setValueRange` with that `Value` + as an argument. When implemented, `setValueRange` should be called on + all result values for the operation. - `argRanges` contains one `IntRangeAttrs` for each argument to the op in ODS - order. Non-integer arguments will have the an unbounded range of width-0 - APInts in their `argRanges` element. + This method allows for more precise implementations when operations + want to reason about inputs which may be undefined during the analysis. }], - "void", "inferResultRanges", (ins - "::llvm::ArrayRef<::mlir::ConstantIntRanges>":$argRanges, - "::mlir::SetIntRangeFn":$setResultRanges) - >]; + /*retTy=*/"void", + /*methodName=*/"inferResultRangesFromOptional", + /*args=*/(ins "::llvm::ArrayRef<::mlir::IntegerValueRange>":$argRanges, + "::mlir::SetIntLatticeFn":$setResultRanges), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + ::mlir::intrange::detail::defaultInferResultRanges($_op, + argRanges, + setResultRanges); + }]> + ]; } #endif // MLIR_INTERFACES_INFERINTRANGEINTERFACE diff --git a/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h b/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h index 851bb534bc7ee1..3988a8826498a9 100644 --- a/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h +++ b/mlir/include/mlir/Interfaces/Utils/InferIntRangeCommon.h @@ -25,7 +25,11 @@ namespace intrange { /// abstracted away here to permit writing the function that handles both /// 64- and 32-bit index types. using InferRangeFn = - function_ref)>; + std::function)>; + +/// Function that performs inferrence on an array of `IntegerValueRange`. +using InferIntegerValueRangeFn = + std::function)>; static constexpr unsigned indexMinWidth = 32; static constexpr unsigned indexMaxWidth = 64; @@ -52,7 +56,7 @@ using InferRangeWithOvfFlagsFn = /// /// The `mode` argument specifies if the unsigned, signed, or both results of /// the inference computation should be used when comparing the results. -ConstantIntRanges inferIndexOp(InferRangeFn inferFn, +ConstantIntRanges inferIndexOp(const InferRangeFn &inferFn, ArrayRef argRanges, CmpMode mode); diff --git a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp index a82c30717e275b..9721620807a0f0 100644 --- a/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlow/IntegerRangeAnalysis.cpp @@ -36,17 +36,6 @@ using namespace mlir; using namespace mlir::dataflow; -IntegerValueRange IntegerValueRange::getMaxRange(Value value) { - unsigned width = ConstantIntRanges::getStorageBitwidth(value.getType()); - if (width == 0) - return {}; - APInt umin = APInt::getMinValue(width); - APInt umax = APInt::getMaxValue(width); - APInt smin = width != 0 ? APInt::getSignedMinValue(width) : umin; - APInt smax = width != 0 ? APInt::getSignedMaxValue(width) : umax; - return IntegerValueRange{ConstantIntRanges{umin, umax, smin, smax}}; -} - void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const { Lattice::onUpdate(solver); @@ -72,24 +61,17 @@ void IntegerValueRangeLattice::onUpdate(DataFlowSolver *solver) const { void IntegerRangeAnalysis::visitOperation( Operation *op, ArrayRef operands, ArrayRef results) { - // If the lattice on any operand is unitialized, bail out. - if (llvm::any_of(operands, [](const IntegerValueRangeLattice *lattice) { - return lattice->getValue().isUninitialized(); - })) { - return; - } - auto inferrable = dyn_cast(op); if (!inferrable) return setAllToEntryStates(results); LLVM_DEBUG(llvm::dbgs() << "Inferring ranges for " << *op << "\n"); - SmallVector argRanges( - llvm::map_range(operands, [](const IntegerValueRangeLattice *val) { - return val->getValue().getValue(); - })); + auto argRanges = llvm::map_to_vector( + operands, [](const IntegerValueRangeLattice *lattice) { + return lattice->getValue(); + }); - auto joinCallback = [&](Value v, const ConstantIntRanges &attrs) { + auto joinCallback = [&](Value v, const IntegerValueRange &attrs) { auto result = dyn_cast(v); if (!result) return; @@ -99,7 +81,7 @@ void IntegerRangeAnalysis::visitOperation( IntegerValueRangeLattice *lattice = results[result.getResultNumber()]; IntegerValueRange oldRange = lattice->getValue(); - ChangeResult changed = lattice->join(IntegerValueRange{attrs}); + ChangeResult changed = lattice->join(attrs); // Catch loop results with loop variant bounds and conservatively make // them [-inf, inf] so we don't circle around infinitely often (because @@ -116,7 +98,7 @@ void IntegerRangeAnalysis::visitOperation( propagateIfChanged(lattice, changed); }; - inferrable.inferResultRanges(argRanges, joinCallback); + inferrable.inferResultRangesFromOptional(argRanges, joinCallback); } void IntegerRangeAnalysis::visitNonControlFlowArguments( @@ -124,17 +106,12 @@ void IntegerRangeAnalysis::visitNonControlFlowArguments( ArrayRef argLattices, unsigned firstIndex) { if (auto inferrable = dyn_cast(op)) { LLVM_DEBUG(llvm::dbgs() << "Inferring ranges for " << *op << "\n"); - // If the lattice on any operand is unitialized, bail out. - if (llvm::any_of(op->getOperands(), [&](Value value) { - return getLatticeElementFor(op, value)->getValue().isUninitialized(); - })) - return; - SmallVector argRanges( - llvm::map_range(op->getOperands(), [&](Value value) { - return getLatticeElementFor(op, value)->getValue().getValue(); - })); - auto joinCallback = [&](Value v, const ConstantIntRanges &attrs) { + auto argRanges = llvm::map_to_vector(op->getOperands(), [&](Value value) { + return getLatticeElementFor(op, value)->getValue(); + }); + + auto joinCallback = [&](Value v, const IntegerValueRange &attrs) { auto arg = dyn_cast(v); if (!arg) return; @@ -145,7 +122,7 @@ void IntegerRangeAnalysis::visitNonControlFlowArguments( IntegerValueRangeLattice *lattice = argLattices[arg.getArgNumber()]; IntegerValueRange oldRange = lattice->getValue(); - ChangeResult changed = lattice->join(IntegerValueRange{attrs}); + ChangeResult changed = lattice->join(attrs); // Catch loop results with loop variant bounds and conservatively make // them [-inf, inf] so we don't circle around infinitely often (because @@ -162,7 +139,7 @@ void IntegerRangeAnalysis::visitNonControlFlowArguments( propagateIfChanged(lattice, changed); }; - inferrable.inferResultRanges(argRanges, joinCallback); + inferrable.inferResultRangesFromOptional(argRanges, joinCallback); return; } diff --git a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp index 0be3d76f556de9..388794ec122d21 100644 --- a/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp +++ b/mlir/lib/Conversion/ArithToEmitC/ArithToEmitC.cpp @@ -394,7 +394,9 @@ void mlir::populateArithToEmitCPatterns(TypeConverter &typeConverter, ArithConstantOpConversionPattern, ArithOpConversion, ArithOpConversion, + ArithOpConversion, ArithOpConversion, + ArithOpConversion, ArithOpConversion, IntegerOpConversion, IntegerOpConversion, diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index f425b1f59d9940..70dcccf0a7307a 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -77,9 +77,9 @@ Value getLaneId(ConversionPatternRewriter &rewriter, Location loc, } static constexpr StringLiteral amdgcnDataLayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" - "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:" - "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-" - "G1-ni:7:8"; + "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" + "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:" + "64-S32-A5-G1-ni:7:8:9"; namespace { struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern { diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp index 89f956a5e70175..c0c015ab34aab0 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensor.cpp @@ -224,8 +224,17 @@ class ReshapeConverter : public OpConversionPattern { matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { auto loc = reshape.getLoc(); - auto resultType = reshape.getResult().getType(); - auto input = reshape.getInput1(); + auto resultType = cast_if_present( + getTypeConverter()->convertType(reshape.getType())); + if (!resultType) { + return rewriter.notifyMatchFailure(reshape.getLoc(), + "could not convert result type"); + } + auto input = dyn_cast>(adaptor.getInput1()); + if (!input) { + return rewriter.notifyMatchFailure(reshape.getLoc(), + "expected input type to be tensor"); + } auto newShape = reshape.getNewShape(); // Infer all intermediate types @@ -288,12 +297,13 @@ class SliceConverter : public OpConversionPattern { } }; -class PadConverter : public OpRewritePattern { +class PadConverter : public OpConversionPattern { public: - using OpRewritePattern::OpRewritePattern; + using OpConversionPattern::OpConversionPattern; - LogicalResult matchAndRewrite(tosa::PadOp padOp, - PatternRewriter &rewriter) const final { + LogicalResult + matchAndRewrite(tosa::PadOp padOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { auto loc = padOp.getLoc(); auto input = padOp.getInput1(); auto padding = padOp.getPadding(); @@ -428,11 +438,8 @@ struct ConcatConverter : public OpConversionPattern { } // namespace void mlir::tosa::populateTosaToTensorConversionPatterns( - RewritePatternSet *patterns) { - patterns->add< - ConcatConverter, - PadConverter, - ReshapeConverter, - SliceConverter - >(patterns->getContext()); + TypeConverter &converter, RewritePatternSet *patterns) { + patterns + ->add( + converter, patterns->getContext()); } diff --git a/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp b/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp index 50dc55667fb94e..fa1c2cf7fba986 100644 --- a/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp +++ b/mlir/lib/Conversion/TosaToTensor/TosaToTensorPass.cpp @@ -42,7 +42,10 @@ struct TosaToTensor : public impl::TosaToTensorBase { target.addLegalDialect(); target.addLegalDialect(); - mlir::tosa::populateTosaToTensorConversionPatterns(&patterns); + TypeConverter converter; + mlir::tosa::populateTosaTypeConversion(converter); + + mlir::tosa::populateTosaToTensorConversionPatterns(converter, &patterns); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) diff --git a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp index fbe2ecab8adcaa..462044417b5fb8 100644 --- a/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp +++ b/mlir/lib/Dialect/Arith/IR/InferIntRangeInterfaceImpls.cpp @@ -295,18 +295,24 @@ void arith::CmpIOp::inferResultRanges(ArrayRef argRanges, // SelectOp //===----------------------------------------------------------------------===// -void arith::SelectOp::inferResultRanges(ArrayRef argRanges, - SetIntRangeFn setResultRange) { - std::optional mbCondVal = argRanges[0].getConstantValue(); +void arith::SelectOp::inferResultRangesFromOptional( + ArrayRef argRanges, SetIntLatticeFn setResultRange) { + std::optional mbCondVal = + argRanges[0].isUninitialized() + ? std::nullopt + : argRanges[0].getValue().getConstantValue(); + + const IntegerValueRange &trueCase = argRanges[1]; + const IntegerValueRange &falseCase = argRanges[2]; if (mbCondVal) { if (mbCondVal->isZero()) - setResultRange(getResult(), argRanges[2]); + setResultRange(getResult(), falseCase); else - setResultRange(getResult(), argRanges[1]); + setResultRange(getResult(), trueCase); return; } - setResultRange(getResult(), argRanges[1].rangeUnion(argRanges[2])); + setResultRange(getResult(), IntegerValueRange::join(trueCase, falseCase)); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Arith/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Arith/Transforms/Bufferize.cpp deleted file mode 100644 index 9a066756f429ca..00000000000000 --- a/mlir/lib/Dialect/Arith/Transforms/Bufferize.cpp +++ /dev/null @@ -1,67 +0,0 @@ -//===- Bufferize.cpp - Bufferization for Arith ops ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Arith/Transforms/Passes.h" - -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" - -namespace mlir { -namespace arith { -#define GEN_PASS_DEF_ARITHBUFFERIZEPASS -#include "mlir/Dialect/Arith/Transforms/Passes.h.inc" -} // namespace arith -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -/// Pass to bufferize Arith ops. -struct ArithBufferizePass - : public arith::impl::ArithBufferizePassBase { - using ArithBufferizePassBase::ArithBufferizePassBase; - - ArithBufferizePass(uint64_t alignment = 0, bool constantOpOnly = false) - : constantOpOnly(constantOpOnly) { - this->alignment = alignment; - } - - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - if (constantOpOnly) { - options.opFilter.allowOperation(); - } else { - options.opFilter.allowDialect(); - } - options.bufferAlignment = alignment; - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - arith::registerBufferizableOpInterfaceExternalModels(registry); - } - -private: - bool constantOpOnly; -}; -} // namespace - -std::unique_ptr -mlir::arith::createConstantBufferizePass(uint64_t alignment) { - return std::make_unique(alignment, - /*constantOpOnly=*/true); -} diff --git a/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt index 12659eaba1fa5e..6b8bde8dc2aaf3 100644 --- a/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Arith/Transforms/CMakeLists.txt @@ -1,7 +1,6 @@ add_mlir_dialect_library(MLIRArithTransforms BufferDeallocationOpInterfaceImpl.cpp BufferizableOpInterfaceImpl.cpp - Bufferize.cpp BufferViewFlowOpInterfaceImpl.cpp EmulateUnsupportedFloats.cpp EmulateWideInt.cpp diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 7ba347a1f15e47..0fddd60eb8140e 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -320,29 +320,6 @@ struct OneShotBufferizePass }; } // namespace -namespace { -struct BufferizationBufferizePass - : public bufferization::impl::BufferizationBufferizeBase< - BufferizationBufferizePass> { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); - } -}; -} // namespace - -std::unique_ptr mlir::bufferization::createBufferizationBufferizePass() { - return std::make_unique(); -} - std::unique_ptr mlir::bufferization::createOneShotBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp deleted file mode 100644 index 8812ca14ba6109..00000000000000 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===- Bufferize.cpp - Bufferization of linalg ops ------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Linalg/Passes.h" - -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/IR/BuiltinDialect.h" -#include "mlir/IR/Operation.h" -#include "mlir/Pass/Pass.h" - -namespace mlir { -#define GEN_PASS_DEF_LINALGBUFFERIZEPASS -#include "mlir/Dialect/Linalg/Passes.h.inc" -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -/// Converts Linalg operations that work on tensor-type operands or results to -/// work on buffers. -struct LinalgBufferizePass - : public impl::LinalgBufferizePassBase { - using impl::LinalgBufferizePassBase< - LinalgBufferizePass>::LinalgBufferizePassBase; - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - linalg::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index ed9f40089282a6..7e3dc56e0acdc9 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -2,7 +2,6 @@ add_mlir_dialect_library(MLIRLinalgTransforms AllInterfaces.cpp BubbleUpExtractSlice.cpp BufferizableOpInterfaceImpl.cpp - Bufferize.cpp ConstantFold.cpp ConvertToDestinationStyle.cpp ConvertConv2DToImg2Col.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp index 8fffabf11f3fdd..2e6079e1402e1d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConstantFold.cpp @@ -23,21 +23,21 @@ using namespace mlir; using namespace mlir::linalg; namespace { -/// Base class for constant folding linalg.generic ops with N inputs, 1 output, -/// and permutation indexing maps. +/// Base class for constant folding linalg structured ops with N inputs, 1 +/// output, and permutation indexing maps. /// /// `ConcreteType` should provide methods with signatures /// /// ```c++ -/// bool matchIndexingMaps(GenericOp genericOp) const; -/// RegionComputationFn getRegionComputeFn(GenericOp) const; +/// bool matchIndexingMaps(LinalgOp linalgOp) const; +/// RegionComputationFn getRegionComputeFn(LinalgOp) const; /// ``` /// /// The latter inspects the region and returns the computation inside as a /// functor. The functor will be invoked with constant elements for all inputs /// and should return the corresponding computed constant element for output. template -class FoldConstantBase : public OpRewritePattern { +class FoldConstantBase : public OpInterfaceRewritePattern { public: struct APIntOrFloat { std::optional apInt; @@ -52,25 +52,26 @@ class FoldConstantBase : public OpRewritePattern { FoldConstantBase(MLIRContext *context, const ControlFusionFn &controlFn, PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), controlFn(controlFn) {} + : OpInterfaceRewritePattern(context, benefit), + controlFn(controlFn) {} - LogicalResult matchAndRewrite(GenericOp genericOp, + LogicalResult matchAndRewrite(LinalgOp linalgOp, PatternRewriter &rewriter) const override { // Mixed and buffer sematics aren't supported. - if (!genericOp.hasPureTensorSemantics()) + if (!linalgOp.hasPureTensorSemantics()) return failure(); // Only support ops generating one output for now. - if (genericOp.getNumDpsInits() != 1) + if (linalgOp.getNumDpsInits() != 1) return failure(); - auto outputType = dyn_cast(genericOp.getResultTypes().front()); + auto outputType = dyn_cast(linalgOp->getResultTypes().front()); // Require the output types to be static given that we are generating // constants. if (!outputType || !outputType.hasStaticShape()) return failure(); - if (!llvm::all_of(genericOp.getInputs(), [](Value input) { + if (!llvm::all_of(linalgOp.getDpsInputs(), [](Value input) { return isa(input.getType()); })) return failure(); @@ -80,7 +81,7 @@ class FoldConstantBase : public OpRewritePattern { return cast(value.getType()).getElementType(); }; if (!llvm::all_equal( - llvm::map_range(genericOp->getOperands(), getOperandElementType))) + llvm::map_range(linalgOp->getOperands(), getOperandElementType))) return failure(); // We can only handle the case where we have int/float elements. @@ -93,30 +94,30 @@ class FoldConstantBase : public OpRewritePattern { // entirely in the compiler, without needing to turn all indices into // Values, and then do affine apply on them, and then match back the // constant again. - if (!llvm::all_of(genericOp.getIndexingMapsArray(), + if (!llvm::all_of(linalgOp.getIndexingMapsArray(), [](AffineMap map) { return map.isPermutation(); })) return failure(); - for (OpOperand &operand : genericOp.getDpsInitsMutable()) { - if (genericOp.payloadUsesValueFromOperand(&operand)) + for (OpOperand &operand : linalgOp.getDpsInitsMutable()) { + if (linalgOp.payloadUsesValueFromOperand(&operand)) return failure(); } // Further check the indexing maps are okay for the ConcreteType. - if (!static_cast(this)->matchIndexingMaps(genericOp)) + if (!static_cast(this)->matchIndexingMaps(linalgOp)) return failure(); // Defer to the concrete type to check the region and discover the // computation inside. RegionComputationFn computeFn = - static_cast(this)->getRegionComputeFn(genericOp); + static_cast(this)->getRegionComputeFn(linalgOp); if (!computeFn) return failure(); // All inputs should be constants. - int numInputs = genericOp.getNumDpsInputs(); + int numInputs = linalgOp.getNumDpsInputs(); SmallVector inputValues(numInputs); - for (const auto &en : llvm::enumerate(genericOp.getDpsInputOperands())) { + for (const auto &en : llvm::enumerate(linalgOp.getDpsInputOperands())) { if (!matchPattern(en.value()->get(), m_Constant(&inputValues[en.index()]))) return failure(); @@ -124,12 +125,11 @@ class FoldConstantBase : public OpRewritePattern { // Identified this as a potential candidate for folding. Now check the // policy to see whether we are allowed to proceed. - for (OpOperand *operand : genericOp.getDpsInputOperands()) { + for (OpOperand *operand : linalgOp.getDpsInputOperands()) { if (!controlFn(operand)) return failure(); } - auto linalgOp = cast(genericOp.getOperation()); SmallVector loopBounds = linalgOp.computeStaticLoopSizes(); int64_t numElements = outputType.getNumElements(); @@ -155,8 +155,8 @@ class FoldConstantBase : public OpRewritePattern { SmallVector> inputDims; for (int i = 0; i < numInputs; ++i) - inputDims.push_back(getDimPositions(genericOp.getIndexingMapsArray()[i])); - auto outputDims = getDimPositions(genericOp.getIndexingMapsArray().back()); + inputDims.push_back(getDimPositions(linalgOp.getIndexingMapsArray()[i])); + auto outputDims = getDimPositions(linalgOp.getIndexingMapsArray().back()); auto outputShape = outputType.getShape(); // Allocate small vectors for index delinearization. Initial values do not @@ -173,7 +173,7 @@ class FoldConstantBase : public OpRewritePattern { APIntOrFloatArray computeFnInputs; auto inputShapes = llvm::to_vector<4>( - llvm::map_range(genericOp.getInputs(), [](Value value) { + llvm::map_range(linalgOp.getDpsInputs(), [](Value value) { return cast(value.getType()).getShape(); })); @@ -254,7 +254,7 @@ class FoldConstantBase : public OpRewritePattern { isFloat ? DenseElementsAttr::get(outputType, fpOutputValues) : DenseElementsAttr::get(outputType, intOutputValues); - rewriter.replaceOpWithNewOp(genericOp, outputAttr); + rewriter.replaceOpWithNewOp(linalgOp, outputAttr); return success(); } @@ -262,18 +262,20 @@ class FoldConstantBase : public OpRewritePattern { ControlFusionFn controlFn; }; -// Folds linalg.generic ops that are actually transposes on constant values. +// Folds linalg.transpose (and linalg.generic ops that are actually transposes) +// on constant values. struct FoldConstantTranspose : public FoldConstantBase { + using FoldConstantBase::FoldConstantBase; - bool matchIndexingMaps(GenericOp genericOp) const { + bool matchIndexingMaps(LinalgOp linalgOp) const { // We should have one input and one output. - return genericOp.getIndexingMapsArray().size() == 2; + return linalgOp.getIndexingMapsArray().size() == 2; } - RegionComputationFn getRegionComputeFn(GenericOp genericOp) const { + RegionComputationFn getRegionComputeFn(LinalgOp linalgOp) const { // Make sure the region only contains a yield op. - Block &body = genericOp.getRegion().front(); + Block &body = linalgOp->getRegion(0).front(); if (!llvm::hasSingleElement(body)) return nullptr; auto yieldOp = dyn_cast(body.getTerminator()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp index 65efa18af18f65..c0829397f1f851 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -351,7 +351,8 @@ static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata( auto isUnitDim = [&](unsigned dim) { if (auto dimExpr = dyn_cast(exprs[dim])) { unsigned oldPosition = dimExpr.getPosition(); - return !oldDimsToNewDimsMap.count(oldPosition); + return !oldDimsToNewDimsMap.count(oldPosition) && + (operandShape[dim] == 1); } // Handle the other case where the shape is 1, and is accessed using a // constant 0. diff --git a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp deleted file mode 100644 index 9dadbdbc91eca9..00000000000000 --- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//====----- Bufferize.cpp - Bufferization of shape ops ---------*- C++-*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Shape/Transforms/Passes.h" - -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Pass/Pass.h" - -namespace mlir { -#define GEN_PASS_DEF_SHAPEBUFFERIZE -#include "mlir/Dialect/Shape/Transforms/Passes.h.inc" -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -struct ShapeBufferizePass - : public impl::ShapeBufferizeBase { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - shape::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace - -std::unique_ptr> mlir::createShapeBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt index 7c9b0d2e5e3a8e..a51c6780c28665 100644 --- a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRShapeOpsTransforms BufferizableOpInterfaceImpl.cpp - Bufferize.cpp OutlineShapeComputation.cpp RemoveShapeConstraints.cpp ShapeToShapeLowering.cpp diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index f57353b5892b5a..b42d58634a36c4 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -23,7 +23,6 @@ namespace mlir { #define GEN_PASS_DEF_SPARSEASSEMBLER -#define GEN_PASS_DEF_SPARSEENCODINGPROPAGATION #define GEN_PASS_DEF_SPARSEREINTERPRETMAP #define GEN_PASS_DEF_PRESPARSIFICATIONREWRITE #define GEN_PASS_DEF_SPARSIFICATIONPASS @@ -61,14 +60,6 @@ struct SparseAssembler : public impl::SparseAssemblerBase { } }; -struct SparseEncodingPropagation - : public impl::SparseEncodingPropagationBase { - SparseEncodingPropagation() = default; - SparseEncodingPropagation(const SparseEncodingPropagation &pass) = default; - - void runOnOperation() override {} -}; - struct SparseReinterpretMap : public impl::SparseReinterpretMapBase { SparseReinterpretMap() = default; @@ -407,10 +398,6 @@ std::unique_ptr mlir::createSparseAssembler() { return std::make_unique(); } -std::unique_ptr mlir::createSparseEncodingPropagationPass() { - return std::make_unique(); -} - std::unique_ptr mlir::createSparseReinterpretMapPass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp deleted file mode 100644 index d27c4576a8b7a9..00000000000000 --- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===- Bufferize.cpp - Bufferization for `tensor` dialect ops -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements bufferization of `tensor` dialect ops -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/IR/SCF.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Tensor/Transforms/Passes.h" -#include "mlir/IR/ImplicitLocOpBuilder.h" -#include "mlir/Transforms/DialectConversion.h" - -namespace mlir { -namespace tensor { -#define GEN_PASS_DEF_TENSORBUFFERIZE -#include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" -} // namespace tensor -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -struct TensorBufferizePass - : public tensor::impl::TensorBufferizeBase { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); - tensor::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace - -std::unique_ptr mlir::tensor::createTensorBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt index 0aabdaf667b9d8..ce32dea09bb0b5 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRTensorTransforms BufferizableOpInterfaceImpl.cpp - Bufferize.cpp ConcatOpPatterns.cpp EmptyOpPatterns.cpp ExtractSliceFromReshapeUtils.cpp diff --git a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt index 0e6510ba1e9255..c78a74b874aff1 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tosa/Transforms/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRTosaTransforms TosaLayerwiseConstantFoldPass.cpp TosaMakeBroadcastable.cpp TosaOptionalDecompositions.cpp + TosaTypeConverters.cpp TosaValidation.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaTypeConverters.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaTypeConverters.cpp new file mode 100644 index 00000000000000..d2650de8cd7f02 --- /dev/null +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaTypeConverters.cpp @@ -0,0 +1,52 @@ + +//===- TosaTypeConverters.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Type converters for lowering TOSA to linalg/arith. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tosa/Transforms/Passes.h" + +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +void mlir::tosa::populateTosaTypeConversion(TypeConverter &converter) { + converter.addConversion([&](Type type) -> std::optional { + if (type.isUnsignedInteger()) { + return IntegerType::get(type.getContext(), type.getIntOrFloatBitWidth(), + IntegerType::SignednessSemantics::Signless); + } + return type; + }); + converter.addConversion([&](TensorType type) -> std::optional { + auto converted = converter.convertType(type.getElementType()); + if (!converted) + return {}; + return type.clone(converted); + }); + converter.addSourceMaterialization([&](OpBuilder &builder, Type resultType, + ValueRange inputs, + Location loc) -> std::optional { + if (inputs.size() != 1) + return std::nullopt; + + return builder.create(loc, resultType, inputs) + .getResult(0); + }); + converter.addTargetMaterialization([&](OpBuilder &builder, Type resultType, + ValueRange inputs, + Location loc) -> std::optional { + if (inputs.size() != 1) + return std::nullopt; + + return builder.create(loc, resultType, inputs) + .getResult(0); + }); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp deleted file mode 100644 index ee99a99b561090..00000000000000 --- a/mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===- Bufferize.cpp - Bufferization for `vector` dialect ops -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements bufferization of `vector` dialect ops -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" - -#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Vector/IR/VectorOps.h" -#include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h" -#include "mlir/Dialect/Vector/Transforms/Passes.h" - -namespace mlir { -namespace vector { -#define GEN_PASS_DEF_VECTORBUFFERIZE -#include "mlir/Dialect/Vector/Transforms/Passes.h.inc" -} // namespace vector -} // namespace mlir - -using namespace mlir; -using namespace bufferization; - -namespace { -struct VectorBufferizePass - : public vector::impl::VectorBufferizeBase { - void runOnOperation() override { - BufferizationOptions options = getPartialBufferizationOptions(); - options.opFilter.allowDialect(); - - if (failed(bufferizeOp(getOperation(), options))) - signalPassFailure(); - } - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - vector::registerBufferizableOpInterfaceExternalModels(registry); - } -}; -} // namespace - -std::unique_ptr mlir::vector::createVectorBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt index c4b6abd3e23615..4dbefdd376a8b9 100644 --- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt @@ -1,6 +1,5 @@ add_mlir_dialect_library(MLIRVectorTransforms BufferizableOpInterfaceImpl.cpp - Bufferize.cpp LowerVectorBroadcast.cpp LowerVectorContract.cpp LowerVectorGather.cpp diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp index 802a64b0805ee4..156bf742f6297a 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp @@ -44,6 +44,19 @@ static bool isLessThanTargetBitWidth(Operation *op, unsigned targetBitWidth) { return true; } +static bool isLessThanOrEqualTargetBitWidth(Type t, unsigned targetBitWidth) { + VectorType vecType = dyn_cast(t); + // Reject index since getElementTypeBitWidth will abort for Index types. + if (!vecType || vecType.getElementType().isIndex()) + return false; + // There are no dimension to fold if it is a 0-D vector. + if (vecType.getRank() == 0) + return false; + unsigned trailingVecDimBitWidth = + vecType.getShape().back() * vecType.getElementTypeBitWidth(); + return trailingVecDimBitWidth <= targetBitWidth; +} + namespace { struct LinearizeConstant final : OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -355,6 +368,88 @@ struct LinearizeVectorExtract final return success(); } +private: + unsigned targetVectorBitWidth; +}; + +/// This pattern converts the InsertOp to a ShuffleOp that works on a +/// linearized vector. +/// Following, +/// vector.insert %source %destination [ position ] +/// is converted to : +/// %source_1d = vector.shape_cast %source +/// %destination_1d = vector.shape_cast %destination +/// %out_1d = vector.shuffle %destination_1d, %source_1d [ shuffle_indices_1d +/// ] %out_nd = vector.shape_cast %out_1d +/// `shuffle_indices_1d` is computed using the position of the original insert. +struct LinearizeVectorInsert final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LinearizeVectorInsert( + const TypeConverter &typeConverter, MLIRContext *context, + unsigned targetVectBitWidth = std::numeric_limits::max(), + PatternBenefit benefit = 1) + : OpConversionPattern(typeConverter, context, benefit), + targetVectorBitWidth(targetVectBitWidth) {} + LogicalResult + matchAndRewrite(vector::InsertOp insertOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Type dstTy = getTypeConverter()->convertType(insertOp.getDestVectorType()); + assert(!(insertOp.getDestVectorType().isScalable() || + cast(dstTy).isScalable()) && + "scalable vectors are not supported."); + + if (!isLessThanOrEqualTargetBitWidth(insertOp.getSourceType(), + targetVectorBitWidth)) + return rewriter.notifyMatchFailure( + insertOp, "Can't flatten since targetBitWidth < OpSize"); + + // dynamic position is not supported + if (insertOp.hasDynamicPosition()) + return rewriter.notifyMatchFailure(insertOp, + "dynamic position is not supported."); + auto srcTy = insertOp.getSourceType(); + auto srcAsVec = dyn_cast(srcTy); + uint64_t srcSize = 0; + if (srcAsVec) { + srcSize = srcAsVec.getNumElements(); + } else { + return rewriter.notifyMatchFailure(insertOp, + "scalars are not supported."); + } + + auto dstShape = insertOp.getDestVectorType().getShape(); + const auto dstSize = insertOp.getDestVectorType().getNumElements(); + auto dstSizeForOffsets = dstSize; + + // compute linearized offset + int64_t linearizedOffset = 0; + auto offsetsNd = insertOp.getStaticPosition(); + for (auto [dim, offset] : llvm::enumerate(offsetsNd)) { + dstSizeForOffsets /= dstShape[dim]; + linearizedOffset += offset * dstSizeForOffsets; + } + + llvm::SmallVector indices(dstSize); + auto origValsUntil = indices.begin(); + std::advance(origValsUntil, linearizedOffset); + std::iota(indices.begin(), origValsUntil, + 0); // original values that remain [0, offset) + auto newValsUntil = origValsUntil; + std::advance(newValsUntil, srcSize); + std::iota(origValsUntil, newValsUntil, + dstSize); // new values [offset, offset+srcNumElements) + std::iota(newValsUntil, indices.end(), + linearizedOffset + srcSize); // the rest of original values + // [offset+srcNumElements, end) + + rewriter.replaceOpWithNewOp( + insertOp, dstTy, adaptor.getDest(), adaptor.getSource(), + rewriter.getI64ArrayAttr(indices)); + + return success(); + } + private: unsigned targetVectorBitWidth; }; @@ -410,6 +505,6 @@ void mlir::vector::populateVectorLinearizeShuffleLikeOpsPatterns( : true; }); patterns.add( + LinearizeVectorInsert, LinearizeVectorExtractStridedSlice>( typeConverter, patterns.getContext(), targetBitWidth); } diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp index b3f6c0ee3cc32d..d879b93586899b 100644 --- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp +++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp @@ -126,3 +126,51 @@ raw_ostream &mlir::operator<<(raw_ostream &os, const ConstantIntRanges &range) { return os << "unsigned : [" << range.umin() << ", " << range.umax() << "] signed : [" << range.smin() << ", " << range.smax() << "]"; } + +IntegerValueRange IntegerValueRange::getMaxRange(Value value) { + unsigned width = ConstantIntRanges::getStorageBitwidth(value.getType()); + if (width == 0) + return {}; + + APInt umin = APInt::getMinValue(width); + APInt umax = APInt::getMaxValue(width); + APInt smin = width != 0 ? APInt::getSignedMinValue(width) : umin; + APInt smax = width != 0 ? APInt::getSignedMaxValue(width) : umax; + return IntegerValueRange{ConstantIntRanges{umin, umax, smin, smax}}; +} + +raw_ostream &mlir::operator<<(raw_ostream &os, const IntegerValueRange &range) { + range.print(os); + return os; +} + +void mlir::intrange::detail::defaultInferResultRanges( + InferIntRangeInterface interface, ArrayRef argRanges, + SetIntLatticeFn setResultRanges) { + llvm::SmallVector unpacked; + unpacked.reserve(argRanges.size()); + + for (const IntegerValueRange &range : argRanges) { + if (range.isUninitialized()) + return; + unpacked.push_back(range.getValue()); + } + + interface.inferResultRanges( + unpacked, + [&setResultRanges](Value value, const ConstantIntRanges &argRanges) { + setResultRanges(value, IntegerValueRange{argRanges}); + }); +} + +void mlir::intrange::detail::defaultInferResultRangesFromOptional( + InferIntRangeInterface interface, ArrayRef argRanges, + SetIntRangeFn setResultRanges) { + auto ranges = llvm::to_vector_of(argRanges); + interface.inferResultRangesFromOptional( + ranges, + [&setResultRanges](Value value, const IntegerValueRange &argRanges) { + if (!argRanges.isUninitialized()) + setResultRanges(value, argRanges.getValue()); + }); +} diff --git a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp index fe1a67d6287386..5b8d35e7bd5197 100644 --- a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp +++ b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp @@ -76,7 +76,7 @@ static ConstantIntRanges minMaxBy(ConstArithFn op, ArrayRef lhs, //===----------------------------------------------------------------------===// ConstantIntRanges -mlir::intrange::inferIndexOp(InferRangeFn inferFn, +mlir::intrange::inferIndexOp(const InferRangeFn &inferFn, ArrayRef argRanges, intrange::CmpMode mode) { ConstantIntRanges sixtyFour = inferFn(argRanges); diff --git a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir index b453b69a214e86..dac3fd99b607ce 100644 --- a/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir +++ b/mlir/test/Conversion/ArithToEmitC/arith-to-emitc.mlir @@ -88,6 +88,17 @@ func.func @arith_index(%arg0: index, %arg1: index) { // ----- +// CHECK-LABEL: arith_signed_integer_div_rem +func.func @arith_signed_integer_div_rem(%arg0: i32, %arg1: i32) { + // CHECK: emitc.div %arg0, %arg1 : (i32, i32) -> i32 + %0 = arith.divsi %arg0, %arg1 : i32 + // CHECK: emitc.rem %arg0, %arg1 : (i32, i32) -> i32 + %1 = arith.remsi %arg0, %arg1 : i32 + return +} + +// ----- + func.func @arith_select(%arg0: i1, %arg1: tensor<8xi32>, %arg2: tensor<8xi32>) -> () { // CHECK: [[V0:[^ ]*]] = emitc.conditional %arg0, %arg1, %arg2 : tensor<8xi32> %0 = arith.select %arg0, %arg1, %arg2 : i1, tensor<8xi32> diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 8a2d8bd7967caf..a8d61a6a0f6fd9 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -2,7 +2,8 @@ // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module -// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" +// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" + gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() // CHECK32-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir index 72e7e4cc840886..1e62e25176a007 100644 --- a/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir +++ b/mlir/test/Conversion/TosaToTensor/tosa-to-tensor.mlir @@ -420,6 +420,20 @@ func.func @test_reshape_6d_down_s2s_explicit(%arg0: tensor<1x2x3x5x7x11xf32>) -> // ----- +// CHECK-LABEL: @test_reshape_samerank_unsigned +// CHECK-SAME: (%[[ARG0:.*]]: tensor<3x2xui8>) +func.func @test_reshape_samerank_unsigned(%arg0: tensor<3x2xui8>) -> tensor<2x3xui8> { + // CHECK-NEXT: %[[CAST1:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : tensor<3x2xui8> to tensor<3x2xi8> + // CHECK-NEXT: %[[RESHAPE1:.*]] = tensor.collapse_shape %[[CAST1]] {{\[}}[0, 1]] : tensor<3x2xi8> into tensor<6xi8> + // CHECK-NEXT: %[[RESHAPE2:.*]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1]] output_shape {{\[}}2, 3] : tensor<6xi8> into tensor<2x3xi8> + // CHECK-NEXT: %[[CAST2:.*]] = builtin.unrealized_conversion_cast %[[RESHAPE2]] : tensor<2x3xi8> to tensor<2x3xui8 + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<3x2xui8>) -> tensor<2x3xui8> + // CHECK-NEXT: return %[[CAST2]] + return %0 : tensor<2x3xui8> +} + +// ----- + // CHECK-LABEL: func @slice func.func @slice(%arg0: tensor<6xf32>) ->() { // CHECK: [[SLICE:%.+]] = tensor.extract_slice %arg0[2] [1] [1] diff --git a/mlir/test/Dialect/Arith/bufferize.mlir b/mlir/test/Dialect/Arith/bufferize.mlir index 944954e9e4edd8..a3b1454fb68f66 100644 --- a/mlir/test/Dialect/Arith/bufferize.mlir +++ b/mlir/test/Dialect/Arith/bufferize.mlir @@ -1,5 +1,4 @@ -// RUN: mlir-opt %s -arith-bufferize -split-input-file -verify-diagnostics | FileCheck %s -// RUN: mlir-opt %s -arith-bufferize=alignment=64 -split-input-file -verify-diagnostics | FileCheck --check-prefix=ALIGNED %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=arith,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file -verify-diagnostics | FileCheck %s // CHECK-LABEL: func @index_cast( // CHECK-SAME: %[[TENSOR:.*]]: tensor, %[[SCALAR:.*]]: i32 @@ -22,10 +21,7 @@ func.func @index_cast(%tensor: tensor, %scalar: i32) -> (tensor, ind // The name isn't load-bearing though. // CHECK: memref.global "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00> -// CHECK-NOT: alignment - -// ALIGNED: memref.global "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00> -// ALIGNED-SAME: {alignment = 64 : i64} +// CHECK-SAME: {alignment = 64 : i64} // CHECK: @basic func.func @basic() -> tensor<3x4xf32> { diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 1a387c20c4b297..e4f95bb0545a20 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -2950,14 +2950,6 @@ func.func @unsignedExtendConstantResource() -> tensor { return %ext : tensor } -// Just checks that this doesn't crash. -// CHECK-LABEL: @signedExtendSplatAsDynamicShape -func.func @signedExtendSplatAsDynamicShape() -> tensor { - %splat = arith.constant dense<5> : tensor<2xi16> - %extsplat = arith.extsi %splat : tensor<2xi16> to tensor - return %extsplat : tensor -} - // CHECK-LABEL: @extsi_i0 // CHECK: %[[ZERO:.*]] = arith.constant 0 : i16 // CHECK: return %[[ZERO]] : i16 diff --git a/mlir/test/Dialect/Arith/int-range-interface.mlir b/mlir/test/Dialect/Arith/int-range-interface.mlir index 5b538197a0c117..60f0ab41afa48d 100644 --- a/mlir/test/Dialect/Arith/int-range-interface.mlir +++ b/mlir/test/Dialect/Arith/int-range-interface.mlir @@ -899,3 +899,22 @@ func.func @test_shl_i8_nowrap() -> i8 { %2 = test.reflect_bounds %1 : i8 return %2: i8 } + +/// A test case to ensure that the ranges for unsupported ops are initialized +/// properly to maxRange, rather than left uninitialized. +/// In this test case, the previous behavior would leave the ranges for %a and +/// %b uninitialized, resulting in arith.cmpf's range not being updated, even +/// though it has an integer valued result. + +// CHECK-LABEL: func @test_cmpf_propagates +// CHECK: test.reflect_bounds {smax = 2 : index, smin = 1 : index, umax = 2 : index, umin = 1 : index} +func.func @test_cmpf_propagates(%a: f32, %b: f32) -> index { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + + %0 = arith.cmpf ueq, %a, %b : f32 + %1 = arith.select %0, %c1, %c2 : index + %2 = test.reflect_bounds %1 : index + func.return %2 : index +} + diff --git a/mlir/test/Dialect/Arith/invalid.mlir b/mlir/test/Dialect/Arith/invalid.mlir index ada849220bb839..652aa738ad3924 100644 --- a/mlir/test/Dialect/Arith/invalid.mlir +++ b/mlir/test/Dialect/Arith/invalid.mlir @@ -1,13 +1,21 @@ // RUN: mlir-opt -split-input-file %s -verify-diagnostics func.func @test_index_cast_shape_error(%arg0 : tensor) -> tensor<2xi64> { - // expected-error @+1 {{'arith.index_cast' op requires the same shape for all operands and results}} + // expected-error @+1 {{'arith.index_cast' op failed to verify that input and output have the same tensor dimensions}} %0 = arith.index_cast %arg0 : tensor to tensor<2xi64> return %0 : tensor<2xi64> } // ----- +func.func @test_index_cast_shape_dim_error(%arg0 : tensor<2xindex>) -> tensor { + // expected-error @+1 {{'arith.index_cast' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.index_cast %arg0 : tensor<2xindex> to tensor + return %0 : tensor +} + +// ----- + func.func @test_index_cast_tensor_error(%arg0 : tensor) -> i64 { // expected-error @+1 {{'arith.index_cast' op requires the same shape for all operands and results}} %0 = arith.index_cast %arg0 : tensor to i64 @@ -655,6 +663,14 @@ func.func @extsi_scalable_to_fl(%arg0 : vector<[4]xi32>) { // ----- +func.func @extsi_tensor_dim(%arg0 : tensor<4xi32>) { + // expected-error@+1 {{'arith.extsi' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.extsi %arg0 : tensor<4xi32> to tensor + return +} + +// ----- + func.func @extf_scalable_to_fl(%arg0 : vector<[4]xf32>) { // expected-error@+1 {{'arith.extf' op requires the same shape for all operands and results}} %0 = arith.extf %arg0 : vector<[4]xf32> to vector<4xf64> @@ -703,6 +719,22 @@ func.func @bitcast_scalable_to_fl(%arg0 : vector<[4]xf32>) { // ----- +func.func @bitcast_tensor_dim(%arg0 : tensor<4xf32>) { + // expected-error@+1 {{'arith.bitcast' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.bitcast %arg0 : tensor<4xf32> to tensor + return +} + +// ----- + +func.func @bitcast_tensor_dim(%arg0 : tensor) { + // expected-error@+1 {{'arith.bitcast' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.bitcast %arg0 : tensor to tensor<4xi32> + return +} + +// ----- + func.func @trunci_fl_to_scalable(%arg0 : vector<4xi32>) { // expected-error@+1 {{'arith.trunci' op requires the same shape for all operands and results}} %0 = arith.trunci %arg0 : vector<4xi32> to vector<[4]xi8> @@ -719,6 +751,14 @@ func.func @truncf_fl_to_scalable(%arg0 : vector<4xf64>) { // ----- +func.func @truncf_tensor_dim(%arg0 : tensor<4xf64>) { + // expected-error@+1 {{'arith.truncf' op failed to verify that input and output have the same tensor dimensions}} + %0 = arith.truncf %arg0 : tensor<4xf64> to tensor + return +} + +// ----- + func.func @extui_fl_to_scalable(%arg0 : vector<4xi32>) { // expected-error@+1 {{'arith.extui' op requires the same shape for all operands and results}} %0 = arith.extui %arg0 : vector<4xi32> to vector<[4]xi64> diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir index 29f27e6838e661..e8ab1184b1fd26 100644 --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -linalg-bufferize -canonicalize -cse -split-input-file %s | FileCheck %s +// RUN: mlir-opt --one-shot-bufferize="dialect-filter=linalg,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -canonicalize -cse -split-input-file %s | FileCheck %s #map0 = affine_map<(d0) -> (d0)> @@ -189,31 +189,3 @@ func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor) -> tensor { // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[OUT_TENSOR]] } - -// ----- - -// This is a regression test. The linalg-bufferize pass should ignore all func -// dialect ops. - -// CHECK-LABEL: func private @csum(tensor<6xi64>) -> tensor<6xi64> -func.func private @csum(%arg0: tensor<6xi64>) -> tensor<6xi64> - -// CHECK: func public @main(%[[arg0:.*]]: tensor<2x3xi1>) -// CHECK: %[[collapse:.*]] = tensor.collapse_shape %[[arg0]] -// CHECK: %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]] -// CHECK: %[[alloc:.*]] = memref.alloc() -// CHECK: linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>) -// CHECK: %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]] -// CHECK: %[[call:.*]] = call @csum(%[[generic_t]]) -// CHECK: return %[[call]] -func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> { - %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1> - %1 = tensor.empty() : tensor<6xi64> - %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) { - ^bb0(%arg1: i1, %arg2: i64): - %4 = arith.extui %arg1 : i1 to i64 - linalg.yield %4 : i64 - } -> tensor<6xi64> - %3 = func.call @csum(%2) : (tensor<6xi64>) -> tensor<6xi64> - return %3 : tensor<6xi64> -} diff --git a/mlir/test/Dialect/Linalg/constant-fold.mlir b/mlir/test/Dialect/Linalg/constant-fold.mlir new file mode 100644 index 00000000000000..3929c26a3382f4 --- /dev/null +++ b/mlir/test/Dialect/Linalg/constant-fold.mlir @@ -0,0 +1,148 @@ +// RUN: mlir-opt %s -linalg-fuse-elementwise-ops -split-input-file | FileCheck %s + +// CHECK-LABEL: @transpose_fold_2d_fp32 +func.func @transpose_fold_2d_fp32(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + // CHECK: %[[CST:.+]] = arith.constant + // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf32> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %arg1 : f32 + } -> tensor<3x2xf32> + // CHECK: return %[[CST]] + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @transpose_fold_2d_fp64 +func.func @transpose_fold_2d_fp64(%init: tensor<3x2xf64>) -> tensor<3x2xf64> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf64> + // CHECK: %[[CST:.+]] = arith.constant + // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf64> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) { + ^bb0(%arg1: f64, %arg2: f64): + linalg.yield %arg1 : f64 + } -> tensor<3x2xf64> + // CHECK: return %[[CST]] + return %1 : tensor<3x2xf64> +} + +// ----- + +// CHECK-LABEL: @transpose_fold_4d_i32 +func.func @transpose_fold_4d_i32(%init: tensor<3x1x4x2xi32>) -> tensor<3x1x4x2xi32> { + %input = arith.constant dense<[[ + [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], + [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] + ]]> : tensor<1x2x3x4xi32> + // CHECK: %[[CST:.+]] = arith.constant dense<[ + // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], + // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], + // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] + // CHECK-SAME{LITERAL}: ]> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], + iterator_types = ["parallel", "parallel", "parallel", "parallel"] + } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) { + ^bb0(%arg1: i32, %arg2: i32): + linalg.yield %arg1 : i32 + } -> tensor<3x1x4x2xi32> + // CHECK: return %[[CST]] + return %1 : tensor<3x1x4x2xi32> +} + +// ----- + +// CHECK-LABEL: @transpose_fold_4d_i16 +func.func @transpose_fold_4d_i16(%init: tensor<3x1x4x2xi16>) -> tensor<3x1x4x2xi16> { + %input = arith.constant dense<[[ + [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], + [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] + ]]> : tensor<1x2x3x4xi16> + // CHECK: %[[CST:.+]] = arith.constant dense<[ + // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], + // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], + // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] + // CHECK-SAME{LITERAL}: ]> + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], + iterator_types = ["parallel", "parallel", "parallel", "parallel"] + } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) { + ^bb0(%arg1: i16, %arg2: i16): + linalg.yield %arg1 : i16 + } -> tensor<3x1x4x2xi16> + // CHECK: return %[[CST]] + return %1 : tensor<3x1x4x2xi16> +} + +// ----- + +// CHECK-LABEL: @transpose_nofold_non_cst_input +func.func @transpose_nofold_non_cst_input(%input: tensor<2x3xf32>, %init: tensor<3x2xf32>) -> tensor<3x2xf32> { + // CHECK: linalg.generic + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %arg1 : f32 + } -> tensor<3x2xf32> + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @transpose_nofold_yield_const +func.func @transpose_nofold_yield_const(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + %cst = arith.constant 8.0 : f32 + // CHECK: linalg.generic + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + linalg.yield %cst : f32 + } -> tensor<3x2xf32> + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @transpose_nofold_multi_ops_in_region +func.func @transpose_nofold_multi_ops_in_region(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + // CHECK: linalg.generic + %1 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"] + } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + ^bb0(%arg1: f32, %arg2: f32): + %add = arith.addf %arg1, %arg1 : f32 + linalg.yield %add : f32 + } -> tensor<3x2xf32> + return %1 : tensor<3x2xf32> +} + +// ----- + +// CHECK-LABEL: @named_transpose_fold_2d_fp32 +func.func @named_transpose_fold_2d_fp32(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { + %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> + // CHECK: %[[CST:.+]] = arith.constant + // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf32> + %1 = linalg.transpose ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) permutation = [1, 0] + // CHECK: return %[[CST]] + return %1 : tensor<3x2xf32> +} + +// ----- + + diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index a9cbaaf7fdc485..8f9b12880adcf7 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -1087,3 +1087,46 @@ func.func @drop_known_unit_constant_low_high(%arg0: tensor<1x383x128xf32>) -> te // CHECK: } : tensor<383x128xf32> to tensor<384x128xf32> // CHECK: tensor.expand_shape %[[PADDED]] // CHECK-SAME: {{\[}}[0, 1], [2]] output_shape [1, 384, 128] : tensor<384x128xf32> into tensor<1x384x128xf32> + +// ----- + +// CHECK: #[[$MAP0:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> (0, d0)> +// CHECK: #[[$MAP2:.+]] = affine_map<(d0) -> ()> + +// CHECK-LABEL: func @drop_unit_dim_corresponding_to_dynamic_dim +// CHECK-SAME: %[[ARG0:.*]]: tensor<1x?x?x1xf32>, +// CHECK-SAME: %[[ARG1:.*]]: index) -> tensor { +// CHECK: %[[VAL_0:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_2:.*]] = arith.constant dense<1.000000e+00> : tensor +// CHECK: %[[VAL_3:.*]] = tensor.collapse_shape %[[ARG0]] {{\[\[}}0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor +// CHECK: %[[VAL_4:.*]] = tensor.empty(%[[ARG1]]) : tensor +// CHECK: %[[VAL_5:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[ARG1]], %[[VAL_1]]] +// CHECK: %[[VAL_6:.*]] = tensor.empty(%[[VAL_5]]) : tensor +// CHECK: %[[VAL_7:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins(%[[VAL_3]], %[[VAL_2]], %[[VAL_4]] : tensor, tensor, tensor) outs(%[[VAL_6]] : tensor) { +// CHECK: ^bb0(%[[VAL_8:.*]]: f32, %[[VAL_9:.*]]: f32, %[[VAL_10:.*]]: f32, %[[VAL_11:.*]]: f32): +// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_8]], %[[VAL_9]] : f32 +// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_10]], %[[VAL_12]] : f32 +// CHECK: linalg.yield %[[VAL_13]] : f32 +// CHECK: } -> tensor +// CHECK: %[[VAL_14:.*]] = tensor.expand_shape %[[VAL_7]] {{\[\[}}0, 1], [2, 3]] output_shape {{\[}}%[[VAL_0]], 1, 61, 1] : tensor into tensor +// CHECK: return %[[VAL_14]] : tensor +// CHECK: } + +#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)> +#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)> +#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)> +module { + func.func @drop_unit_dim_corresponding_to_dynamic_dim(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor { + %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32> + %0 = tensor.empty(%arg1) : tensor + %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor) { + ^bb0(%in: f32, %in_0: f32, %out: f32): + %2 = arith.mulf %in, %in_0 : f32 + %3 = arith.addf %out, %2 : f32 + linalg.yield %3 : f32 + } -> tensor + return %1 : tensor + } +} diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir index 15a4f6cdd3bbe4..e45a9fbb1052c1 100644 --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -777,139 +777,6 @@ func.func @fuse_scalar_constant(%arg0 : tensor) -> (tensor, te // ----- -// CHECK-LABEL: @transpose_fold_2d_fp32 -func.func @transpose_fold_2d_fp32(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - // CHECK: %[[CST:.+]] = arith.constant - // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf32> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - linalg.yield %arg1 : f32 - } -> tensor<3x2xf32> - // CHECK: return %[[CST]] - return %1 : tensor<3x2xf32> -} - -// ----- - -// CHECK-LABEL: @transpose_fold_2d_fp64 -func.func @transpose_fold_2d_fp64(%init: tensor<3x2xf64>) -> tensor<3x2xf64> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf64> - // CHECK: %[[CST:.+]] = arith.constant - // CHECK-SAME{LITERAL}: dense<[[0.000000e+00, 3.000000e+00], [1.000000e+00, 4.000000e+00], [2.000000e+00, 5.000000e+00]]> : tensor<3x2xf64> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) { - ^bb0(%arg1: f64, %arg2: f64): - linalg.yield %arg1 : f64 - } -> tensor<3x2xf64> - // CHECK: return %[[CST]] - return %1 : tensor<3x2xf64> -} - -// ----- - -// CHECK-LABEL: @transpose_fold_4d_i32 -func.func @transpose_fold_4d_i32(%init: tensor<3x1x4x2xi32>) -> tensor<3x1x4x2xi32> { - %input = arith.constant dense<[[ - [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], - [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] - ]]> : tensor<1x2x3x4xi32> - // CHECK: %[[CST:.+]] = arith.constant dense<[ - // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], - // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], - // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] - // CHECK-SAME{LITERAL}: ]> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], - iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) { - ^bb0(%arg1: i32, %arg2: i32): - linalg.yield %arg1 : i32 - } -> tensor<3x1x4x2xi32> - // CHECK: return %[[CST]] - return %1 : tensor<3x1x4x2xi32> -} - -// ----- - -// CHECK-LABEL: @transpose_fold_4d_i16 -func.func @transpose_fold_4d_i16(%init: tensor<3x1x4x2xi16>) -> tensor<3x1x4x2xi16> { - %input = arith.constant dense<[[ - [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], - [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]] - ]]> : tensor<1x2x3x4xi16> - // CHECK: %[[CST:.+]] = arith.constant dense<[ - // CHECK-SAME{LITERAL}: [[[0, 12], [1, 13], [2, 14], [3, 15]]], - // CHECK-SAME{LITERAL}: [[[4, 16], [5, 17], [6, 18], [7, 19]]], - // CHECK-SAME{LITERAL}: [[[8, 20], [9, 21], [10, 22], [11, 23]]] - // CHECK-SAME{LITERAL}: ]> - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], - iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) { - ^bb0(%arg1: i16, %arg2: i16): - linalg.yield %arg1 : i16 - } -> tensor<3x1x4x2xi16> - // CHECK: return %[[CST]] - return %1 : tensor<3x1x4x2xi16> -} - -// ----- - -// CHECK-LABEL: @transpose_nofold_non_cst_input -func.func @transpose_nofold_non_cst_input(%input: tensor<2x3xf32>, %init: tensor<3x2xf32>) -> tensor<3x2xf32> { - // CHECK: linalg.generic - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - linalg.yield %arg1 : f32 - } -> tensor<3x2xf32> - return %1 : tensor<3x2xf32> -} - -// ----- - -// CHECK-LABEL: @transpose_nofold_yield_const -func.func @transpose_nofold_yield_const(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - %cst = arith.constant 8.0 : f32 - // CHECK: linalg.generic - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - linalg.yield %cst : f32 - } -> tensor<3x2xf32> - return %1 : tensor<3x2xf32> -} - -// ----- - -// CHECK-LABEL: @transpose_nofold_multi_ops_in_region -func.func @transpose_nofold_multi_ops_in_region(%init: tensor<3x2xf32>) -> tensor<3x2xf32> { - %input = arith.constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32> - // CHECK: linalg.generic - %1 = linalg.generic { - indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], - iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { - ^bb0(%arg1: f32, %arg2: f32): - %add = arith.addf %arg1, %arg1 : f32 - linalg.yield %add : f32 - } -> tensor<3x2xf32> - return %1 : tensor<3x2xf32> -} - -// ----- - // Fusing the broadcast into a reduction would require to insert extra knowledge // about the size of the reduction dimension. As long, as this is not // implemented, we check that two linalg operations remain. diff --git a/mlir/test/Dialect/Shape/bufferize.mlir b/mlir/test/Dialect/Shape/bufferize.mlir index 963a5e8bcf5787..9f30a052208f0b 100644 --- a/mlir/test/Dialect/Shape/bufferize.mlir +++ b/mlir/test/Dialect/Shape/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -shape-bufferize <%s | FileCheck %s +// RUN: mlir-opt -split-input-file --one-shot-bufferize="dialect-filter=shape,bufferization copy-before-write unknown-type-conversion=identity-layout-map allow-unknown-ops" <%s | FileCheck %s // ----- diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir index 6112856fbf2931..c27df00785522a 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -4,8 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK-MIR // // RUN: mlir-opt %s --sparse-reinterpret-map -sparsification --sparse-tensor-conversion --cse \ -// RUN: --func-bufferize --arith-bufferize \ -// RUN: --tensor-bufferize --finalizing-bufferize | \ +// RUN: --one-shot-bufferize="copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" | \ // RUN: FileCheck %s --check-prefix=CHECK-LIR #CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}> diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir index 401da152a8bdb8..9fbb9dd0a26d17 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir @@ -4,8 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK-MIR // // RUN: mlir-opt %s --sparse-reinterpret-map -sparsification --sparse-tensor-conversion --cse \ -// RUN: --func-bufferize --arith-bufferize \ -// RUN: --tensor-bufferize --finalizing-bufferize | \ +// RUN: --one-shot-bufferize="copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" | \ // RUN: FileCheck %s --check-prefix=CHECK-LIR #CSC = #sparse_tensor.encoding<{ diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir index d769876d8ee8e3..a827360abb4267 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -4,8 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK-MIR // // RUN: mlir-opt %s --sparse-reinterpret-map -sparsification --sparse-tensor-conversion --cse \ -// RUN: --func-bufferize --arith-bufferize \ -// RUN: --tensor-bufferize --finalizing-bufferize | \ +// RUN: --one-shot-bufferize="copy-before-write bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" | \ // RUN: FileCheck %s --check-prefix=CHECK-LIR #CSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}> diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 4f553adcc500fb..e85d9e740adf4e 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -tensor-bufferize -cse -split-input-file | FileCheck %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=tensor,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -cse -split-input-file | FileCheck %s // CHECK-LABEL: func @dim( // CHECK-SAME: %[[TENSOR:.*]]: tensor<*xf32>, diff --git a/mlir/test/Dialect/Vector/bufferize-invalid.mlir b/mlir/test/Dialect/Vector/bufferize-invalid.mlir index 1ae3e312c868f7..bcca50a0fe79a6 100644 --- a/mlir/test/Dialect/Vector/bufferize-invalid.mlir +++ b/mlir/test/Dialect/Vector/bufferize-invalid.mlir @@ -1,5 +1,4 @@ -// RUN: mlir-opt %s -vector-bufferize -split-input-file -verify-diagnostics -// | FileCheck %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=vector,bufferization copy-before-write unknown-type-conversion=identity-layout-map allow-unknown-ops" -split-input-file -verify-diagnostics // CHECK-LABEL: func @mask( func.func @mask(%t0: tensor, %val: vector<16xf32>, %idx: index, %m0: vector<16xi1>) -> tensor { diff --git a/mlir/test/Dialect/Vector/bufferize.mlir b/mlir/test/Dialect/Vector/bufferize.mlir index 6a6a8fa8938bc2..3399f60a2c3bf3 100644 --- a/mlir/test/Dialect/Vector/bufferize.mlir +++ b/mlir/test/Dialect/Vector/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -vector-bufferize -split-input-file | FileCheck %s +// RUN: mlir-opt %s --one-shot-bufferize="dialect-filter=vector,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s // CHECK-LABEL: func @transfer_read( // CHECK-SAME: %[[t:.*]]: tensor, %[[o1:.*]]: index, %[[o2:.*]]: index, %[[pad:.*]]: f32) diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index b29ceab5783d7a..31a59b809a74ba 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -245,3 +245,32 @@ func.func @test_vector_extract(%arg0: vector<2x8x2xf32>) -> vector<8x2xf32> { %0 = vector.extract %arg0[1]: vector<8x2xf32> from vector<2x8x2xf32> return %0 : vector<8x2xf32> } + +// ----- +// ALL-LABEL: test_vector_insert +// ALL-SAME: (%[[DEST:.*]]: vector<2x8x4xf32>, %[[SRC:.*]]: vector<8x4xf32>) -> vector<2x8x4xf32> { +func.func @test_vector_insert(%arg0: vector<2x8x4xf32>, %arg1: vector<8x4xf32>) -> vector<2x8x4xf32> { + // DEFAULT: %[[ARG_SRC:.*]] = vector.shape_cast %[[SRC]] : vector<8x4xf32> to vector<32xf32> + // DEFAULT: %[[ARG_DEST:.*]] = vector.shape_cast %[[DEST]] : vector<2x8x4xf32> to vector<64xf32> + // DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG_DEST]], %[[ARG_SRC]] + // DEFAULT-SAME: [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + // DEFAULT-SAME: 88, 89, 90, 91, 92, 93, 94, 95, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + // DEFAULT-SAME: 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63] : vector<64xf32>, vector<32xf32> + // DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<64xf32> to vector<2x8x4xf32> + // DEFAULT: return %[[RES]] : vector<2x8x4xf32> + + // BW-128: %[[ARG_SRC:.*]] = vector.shape_cast %[[SRC]] : vector<8x4xf32> to vector<32xf32> + // BW-128: %[[ARG_DEST:.*]] = vector.shape_cast %[[DEST]] : vector<2x8x4xf32> to vector<64xf32> + // BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG_DEST]], %[[ARG_SRC]] + // BW-128-SAME: [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + // BW-128-SAME: 88, 89, 90, 91, 92, 93, 94, 95, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + // BW-128-SAME: 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63] : vector<64xf32>, vector<32xf32> + // BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<64xf32> to vector<2x8x4xf32> + // BW-128: return %[[RES]] : vector<2x8x4xf32> + + // BW-0: %[[RES:.*]] = vector.insert %[[SRC]], %[[DEST]] [0] : vector<8x4xf32> into vector<2x8x4xf32> + // BW-0: return %[[RES]] : vector<2x8x4xf32> + + %0 = vector.insert %arg1, %arg0[0]: vector<8x4xf32> into vector<2x8x4xf32> + return %0 : vector<2x8x4xf32> +} diff --git a/mlir/test/Examples/NVGPU/tools/nvdsl.py b/mlir/test/Examples/NVGPU/tools/nvdsl.py index 600cae5b47eeec..90dbb2355e1c87 100644 --- a/mlir/test/Examples/NVGPU/tools/nvdsl.py +++ b/mlir/test/Examples/NVGPU/tools/nvdsl.py @@ -431,7 +431,7 @@ def __str__(self): # saveIR(module) # Verify the module - # module.operation.verify() + module.operation.verify() # Compile and JIT MLIR module options = f"cubin-chip=sm_90a cubin-features=+ptx80 opt-level=3" diff --git a/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir b/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir index b0e414d157268b..5d27c3e290d50c 100644 --- a/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir +++ b/mlir/test/Integration/Dialect/Complex/CPU/correctness.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -func-bufferize -tensor-bufferize -arith-bufferize --canonicalize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" --canonicalize \ // RUN: -convert-scf-to-cf --convert-complex-to-standard \ // RUN: -finalize-memref-to-llvm -convert-math-to-llvm -convert-math-to-libm \ // RUN: -convert-vector-to-llvm -convert-complex-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir index 43e423d4c3e8e1..734e09b7ed103d 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ -// RUN: -shared-libs=%mlir_runner_utils \ +// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils \ // RUN: | FileCheck %s diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir index 84dad567ced3ff..a323b0d9f876cf 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s -convert-elementwise-to-linalg \ -// RUN: -arith-bufferize -linalg-bufferize -tensor-bufferize -func-bufferize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -canonicalize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \ // RUN: -convert-scf-to-cf -convert-arith-to-llvm -convert-cf-to-llvm --finalize-memref-to-llvm \ // RUN: -convert-func-to-llvm -reconcile-unrealized-casts | \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir index db882f7a54d392..45283e173c9f02 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ -// RUN: -shared-libs=%mlir_runner_utils \ +// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils \ // RUN: | FileCheck %s diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir index 54a2bbf8d46809..23a07464bb5be9 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir @@ -1,6 +1,5 @@ // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \ -// RUN: -empty-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \ -// RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \ // RUN: -lower-affine -convert-arith-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir index 98fce6c020c03d..01a0ba26fd7cda 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \ // RUN: -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir index cf7d0c762ea36f..73d4aff73fb7a4 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -linalg-bufferize \ -// RUN: -arith-bufferize -tensor-bufferize -func-bufferize \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \ // RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \ // RUN: -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir index 38b49cd444df3c..ff9ddedf91e177 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir @@ -1,5 +1,6 @@ -// RUN: mlir-opt %s -arith-bufferize -linalg-bufferize \ -// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \ +// RUN: mlir-opt %s \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \ // RUN: -convert-arith-to-llvm -convert-scf-to-cf -convert-cf-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_runner_utils \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir index 41296cdfcb2d5a..698191577efe31 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -1,14 +1,14 @@ // UNSUPPORTED: asan -// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -linalg-bufferize -arith-bufferize \ -// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -expand-strided-metadata -lower-affine -convert-arith-to-llvm -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -transform-interpreter -test-transform-dialect-erase-schedule -linalg-bufferize \ -// RUN: -scf-bufferize -arith-bufferize -tensor-bufferize \ -// RUN: -func-bufferize \ +// RUN: mlir-opt %s -transform-interpreter -test-transform-dialect-erase-schedule \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ // RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-cf -convert-scf-to-cf \ // RUN: -expand-strided-metadata -lower-affine -convert-arith-to-llvm -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Memref/print-memref.mlir b/mlir/test/Integration/Dialect/Memref/print-memref.mlir index b83f3919efd83e..f59e220d7461e6 100644 --- a/mlir/test/Integration/Dialect/Memref/print-memref.mlir +++ b/mlir/test/Integration/Dialect/Memref/print-memref.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -func-bufferize -arith-bufferize --canonicalize \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" --canonicalize \ // RUN: -finalize-memref-to-llvm\ // RUN: -convert-func-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/Memref/verify-memref.mlir b/mlir/test/Integration/Dialect/Memref/verify-memref.mlir index b7e2a46688f475..431ae0a89d20c3 100644 --- a/mlir/test/Integration/Dialect/Memref/verify-memref.mlir +++ b/mlir/test/Integration/Dialect/Memref/verify-memref.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -func-bufferize -arith-bufferize --canonicalize \ +// RUN: -func-bufferize -one-shot-bufferize="bufferize-function-boundaries" --canonicalize \ // RUN: -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm -finalize-memref-to-llvm\ // RUN: -convert-func-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir index faa129efa63a91..a7c5b91273423b 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir @@ -1,5 +1,6 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \ -// RUN: -arith-bufferize -convert-vector-to-llvm="enable-amx" \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -convert-scf-to-cf -convert-vector-to-llvm="enable-amx" \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-translate -mlir-to-llvmir | \ // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir index 3ed28fc68acb8f..7b7ee54db8c348 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir @@ -1,5 +1,7 @@ -// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-cf \ -// RUN: -arith-bufferize -convert-vector-to-llvm="enable-amx" \ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -convert-scf-to-cf \ +// RUN: -convert-vector-to-llvm="enable-amx" \ // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-translate -mlir-to-llvmir | \ // RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" \ diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 18324482153a54..9d7e0a7928ab8d 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2750,7 +2750,7 @@ def TestGraphLoopOp : TEST_Op<"graph_loop", def InferIntRangeType : AnyTypeOf<[AnyInteger, Index]>; def TestWithBoundsOp : TEST_Op<"with_bounds", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, NoMemoryEffect]> { let arguments = (ins APIntAttr:$umin, APIntAttr:$umax, @@ -2762,7 +2762,7 @@ def TestWithBoundsOp : TEST_Op<"with_bounds", } def TestWithBoundsRegionOp : TEST_Op<"with_bounds_region", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, SingleBlock, NoTerminator]> { let arguments = (ins APIntAttr:$umin, APIntAttr:$umax, @@ -2774,7 +2774,7 @@ def TestWithBoundsRegionOp : TEST_Op<"with_bounds_region", } def TestIncrementOp : TEST_Op<"increment", - [DeclareOpInterfaceMethods, + [DeclareOpInterfaceMethods, NoMemoryEffect, AllTypesMatch<["value", "result"]>]> { let arguments = (ins InferIntRangeType:$value); let results = (outs InferIntRangeType:$result); @@ -2783,7 +2783,8 @@ def TestIncrementOp : TEST_Op<"increment", } def TestReflectBoundsOp : TEST_Op<"reflect_bounds", - [DeclareOpInterfaceMethods, AllTypesMatch<["value", "result"]>]> { + [DeclareOpInterfaceMethods, + AllTypesMatch<["value", "result"]>]> { let arguments = (ins InferIntRangeType:$value, OptionalAttr:$umin, OptionalAttr:$umax, diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp index f72007849e36e4..13f08b142b8769 100644 --- a/offload/src/PluginManager.cpp +++ b/offload/src/PluginManager.cpp @@ -155,11 +155,11 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { // Initialize all the plugins that have associated images. for (auto &Plugin : Plugins) { - if (Plugin->is_initialized()) - continue; - // Extract the exectuable image and extra information if availible. for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) { + if (Plugin->is_initialized()) + continue; + if (!Plugin->is_valid_binary(&Desc->DeviceImages[i], /*Initialized=*/false)) continue; diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp index de77e25db2d399..d7658077e83ae0 100644 --- a/openmp/tools/archer/ompt-tsan.cpp +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include "omp-tools.h" @@ -146,18 +146,28 @@ void __attribute__((weak)) __tsan_flush_memory() {} static ArcherFlags *archer_flags; #ifndef TsanHappensBefore + +template static void __ompt_tsan_func(Args...) {} + +#define DECLARE_TSAN_FUNCTION(name, ...) \ + static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>; + // Thread Sanitizer is a tool that finds races in code. // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . // tsan detects these exact functions by name. extern "C" { -static void (*AnnotateHappensAfter)(const char *, int, const volatile void *); -static void (*AnnotateHappensBefore)(const char *, int, const volatile void *); -static void (*AnnotateIgnoreWritesBegin)(const char *, int); -static void (*AnnotateIgnoreWritesEnd)(const char *, int); -static void (*AnnotateNewMemory)(const char *, int, const volatile void *, - size_t); -static void (*__tsan_func_entry)(const void *); -static void (*__tsan_func_exit)(void); +DECLARE_TSAN_FUNCTION(AnnotateHappensAfter, const char *, int, + const volatile void *) +DECLARE_TSAN_FUNCTION(AnnotateHappensBefore, const char *, int, + const volatile void *) +DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesBegin, const char *, int) +DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesEnd, const char *, int) +DECLARE_TSAN_FUNCTION(AnnotateNewMemory, const char *, int, + const volatile void *, size_t) +DECLARE_TSAN_FUNCTION(__tsan_func_entry, const void *) +DECLARE_TSAN_FUNCTION(__tsan_func_exit) + +// RunningOnValgrind is used to detect absence of TSan and must intentionally be a nullptr. static int (*RunningOnValgrind)(void); } @@ -1142,7 +1152,10 @@ static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, #define findTsanFunction(f, fSig) \ do { \ - if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \ + void *fp = dlsym(RTLD_DEFAULT, #f); \ + if (fp) \ + f = fSig fp; \ + else \ printf("Unable to find TSan function " #f ".\n"); \ } while (0) diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index ee504c4e5f5244..6bd5a3abf9ea28 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -47,9 +47,9 @@ struct ScheduleTreeVisitor { return getDerived().visitSequence(Node.as(), std::forward(args)...); case isl_schedule_node_set: + assert(isl_schedule_node_n_children(Node.get()) >= 2); return getDerived().visitSet(Node.as(), std::forward(args)...); - assert(isl_schedule_node_n_children(Node.get()) >= 2); case isl_schedule_node_leaf: assert(isl_schedule_node_n_children(Node.get()) == 0); return getDerived().visitLeaf(Node.as(), diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 446499cf15d7b4..70ec3a48a5e2e3 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -127,6 +127,11 @@ libc_support_library( hdrs = ["hdr/time_macros.h"], ) +libc_support_library( + name = "hdr_float_macros", + hdrs = ["hdr/float_macros.h"], +) + ############################ Type Proxy Header Files ########################### libc_support_library( @@ -189,7 +194,7 @@ libc_support_library( ":__support_macros_properties_compiler", ":__support_macros_properties_cpu_features", ":__support_macros_properties_os", - ":llvm_libc_macros_float_macros", + ":hdr_float_macros", ":llvm_libc_types_float128", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index d1a2c6f11d98a7..a67f20533ae220 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -2232,7 +2232,7 @@ llvm_target_lib_list = [lib for lib in [ ("-gen-callingconv", "lib/Target/X86/X86GenCallingConv.inc"), ("-gen-subtarget", "lib/Target/X86/X86GenSubtargetInfo.inc"), ("-gen-x86-fold-tables -asmwriternum=1", "lib/Target/X86/X86GenFoldTables.inc"), - ("-gen-x86-compress-evex-tables", "lib/Target/X86/X86GenCompressEVEXTables.inc"), + ("-gen-x86-instr-mapping", "lib/Target/X86/X86GenInstrMapping.inc"), ("-gen-exegesis", "lib/Target/X86/X86GenExegesis.inc"), ("-gen-x86-mnemonic-tables -asmwriternum=1", "lib/Target/X86/X86GenMnemonicTables.inc"), ],