Skip to content

Commit 051f392

Browse files
authored
[NFC] Non-recursive wildcard matching (#7988)
Implement a non-recursive wildcard matching algorithm that pushes and pops the search stack as little as possible. It pushes only when it must decide whether or not to have a wildcard consume a character that it could also plausibly not consume.
1 parent 04930ab commit 051f392

File tree

2 files changed

+179
-12
lines changed

2 files changed

+179
-12
lines changed

src/support/string.cpp

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,20 +83,66 @@ Split handleBracketingOperators(Split split) {
8383
}
8484

8585
bool wildcardMatch(const std::string& pattern, const std::string& value) {
86-
for (size_t i = 0; i < pattern.size(); i++) {
87-
if (pattern[i] == '*') {
88-
return wildcardMatch(pattern.substr(i + 1), value.substr(i)) ||
89-
(value.size() > 0 &&
90-
wildcardMatch(pattern.substr(i), value.substr(i + 1)));
91-
}
92-
if (i >= value.size()) {
93-
return false;
94-
}
95-
if (pattern[i] != value[i]) {
96-
return false;
86+
size_t psize = pattern.size(), vsize = value.size();
87+
// When we start looking at a potential match after a wildcard, we must stash
88+
// our current state in case we need to backtrack later. Store the positions
89+
// in the pattern and the value.
90+
std::vector<std::pair<size_t, size_t>> states;
91+
states.emplace_back(0, 0);
92+
while (!states.empty()) {
93+
auto [p, v] = states.back();
94+
states.pop_back();
95+
96+
// Consume input until we need to backtrack.
97+
while (true) {
98+
// Consume matching non-wildcard input from the pattern and value.
99+
while (p < psize && v < vsize && pattern[p] != '*' &&
100+
pattern[p] == value[v]) {
101+
++p;
102+
++v;
103+
}
104+
105+
// Handle wildcards.
106+
if (p < psize && pattern[p] == '*') {
107+
// Skip past the sequence of wildcards.
108+
while (p < psize && pattern[p] == '*') {
109+
++p;
110+
}
111+
if (p == psize) {
112+
// The pattern ended in a wildcard, so it matches the rest of the
113+
// value no matter what it is.
114+
return true;
115+
}
116+
// Find the next possible match.
117+
while (v < vsize && value[v] != pattern[p]) {
118+
++v;
119+
}
120+
if (v == vsize) {
121+
// No match. Backtrack if possible.
122+
break;
123+
}
124+
// We do lazy matching where the wildcard consumes as little as
125+
// possible. Try continuing the match after the wildcard from here, but
126+
// stash the alternative state where we still have a wildcard and it has
127+
// consumed this character in case we need to backtrack.
128+
states.emplace_back(p - 1, v + 1);
129+
continue;
130+
}
131+
132+
// Check end conditions.
133+
if (p == psize && v == vsize) {
134+
// Success! We've matched the full pattern against the full value.
135+
return true;
136+
}
137+
138+
// We're either out of pattern or out of value or we found a mismatch,
139+
// so we need to try to backtrack.
140+
assert(p == psize || v == vsize || pattern[p] != value[v]);
141+
break;
97142
}
98143
}
99-
return value.size() == pattern.size();
144+
// No match, but cannot backtrack any further.
145+
return false;
100146
}
101147

102148
std::string trim(const std::string& input) {

test/lit/passes/no-inline.wast

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
;; RUN: foreach %s %t wasm-opt --no-partial-inline=*maybe* --inlining --optimize-level=3 --partial-inlining-ifs=1 -S -o - | filecheck %s --check-prefix NO_PART
99
;; RUN: foreach %s %t wasm-opt --no-full-inline=*maybe* --inlining --optimize-level=3 --partial-inlining-ifs=1 -S -o - | filecheck %s --check-prefix NO_FULL
1010
;; RUN: foreach %s %t wasm-opt --no-inline=*maybe* --inlining --optimize-level=3 --partial-inlining-ifs=1 -S -o - | filecheck %s --check-prefix NO_BOTH
11+
;; RUN: foreach %s %t wasm-opt --no-inline=*****maybe***** --inlining --optimize-level=3 --partial-inlining-ifs=1 -S -o - | filecheck %s --check-prefix NO_BOTH
1112

1213
(module
1314
;; YES_ALL: (type $0 (func))
@@ -638,6 +639,42 @@
638639
)
639640
)
640641

642+
(func $very-long-name-we-should-not-error-on-even-though-it-is-very-very-long
643+
;; Test a long name.
644+
)
645+
646+
;; NO_FULL: (func $very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long
647+
;; NO_FULL-NEXT: )
648+
;; NO_BOTH: (func $very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long
649+
;; NO_BOTH-NEXT: )
650+
(func $very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long
651+
;; Test a long name with "maybe" in it.
652+
)
653+
654+
;; NO_FULL: (func $very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long
655+
;; NO_FULL-NEXT: )
656+
;; NO_BOTH: (func $very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long
657+
;; NO_BOTH-NEXT: )
658+
(func $very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long
659+
;; Test a long name with "maybe" in it, and a partial match earlier ("may").
660+
)
661+
662+
(func $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmayb
663+
;; Test a long name with many partial matches but no real match.
664+
)
665+
666+
;; NO_FULL: (func $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe
667+
;; NO_FULL-NEXT: )
668+
;; NO_BOTH: (func $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe
669+
;; NO_BOTH-NEXT: )
670+
(func $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe
671+
;; Test a long name with many partial matches and one real match right at the end.
672+
)
673+
674+
(func $mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
675+
;; Test a long name with even more tiny partial matches but no real match.
676+
)
677+
641678
;; YES_ALL: (func $caller
642679
;; YES_ALL-NEXT: (local $0 i32)
643680
;; YES_ALL-NEXT: (local $1 i32)
@@ -741,6 +778,30 @@
741778
;; YES_ALL-NEXT: )
742779
;; YES_ALL-NEXT: )
743780
;; YES_ALL-NEXT: )
781+
;; YES_ALL-NEXT: (block $__inlined_func$very-long-name-we-should-not-error-on-even-though-it-is-very-very-long$4
782+
;; YES_ALL-NEXT: (block
783+
;; YES_ALL-NEXT: )
784+
;; YES_ALL-NEXT: )
785+
;; YES_ALL-NEXT: (block $__inlined_func$very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long$5
786+
;; YES_ALL-NEXT: (block
787+
;; YES_ALL-NEXT: )
788+
;; YES_ALL-NEXT: )
789+
;; YES_ALL-NEXT: (block $__inlined_func$very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long$6
790+
;; YES_ALL-NEXT: (block
791+
;; YES_ALL-NEXT: )
792+
;; YES_ALL-NEXT: )
793+
;; YES_ALL-NEXT: (block $__inlined_func$maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmayb$7
794+
;; YES_ALL-NEXT: (block
795+
;; YES_ALL-NEXT: )
796+
;; YES_ALL-NEXT: )
797+
;; YES_ALL-NEXT: (block $__inlined_func$maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe$8
798+
;; YES_ALL-NEXT: (block
799+
;; YES_ALL-NEXT: )
800+
;; YES_ALL-NEXT: )
801+
;; YES_ALL-NEXT: (block $__inlined_func$mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm$9
802+
;; YES_ALL-NEXT: (block
803+
;; YES_ALL-NEXT: )
804+
;; YES_ALL-NEXT: )
744805
;; YES_ALL-NEXT: )
745806
;; NO_PART: (func $caller
746807
;; NO_PART-NEXT: (call $maybe-partial-or-full-1
@@ -755,6 +816,30 @@
755816
;; NO_PART-NEXT: (call $maybe-partial-or-full-2
756817
;; NO_PART-NEXT: (i32.const 1)
757818
;; NO_PART-NEXT: )
819+
;; NO_PART-NEXT: (block $__inlined_func$very-long-name-we-should-not-error-on-even-though-it-is-very-very-long
820+
;; NO_PART-NEXT: (block
821+
;; NO_PART-NEXT: )
822+
;; NO_PART-NEXT: )
823+
;; NO_PART-NEXT: (block $__inlined_func$very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long$1
824+
;; NO_PART-NEXT: (block
825+
;; NO_PART-NEXT: )
826+
;; NO_PART-NEXT: )
827+
;; NO_PART-NEXT: (block $__inlined_func$very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long$2
828+
;; NO_PART-NEXT: (block
829+
;; NO_PART-NEXT: )
830+
;; NO_PART-NEXT: )
831+
;; NO_PART-NEXT: (block $__inlined_func$maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmayb$3
832+
;; NO_PART-NEXT: (block
833+
;; NO_PART-NEXT: )
834+
;; NO_PART-NEXT: )
835+
;; NO_PART-NEXT: (block $__inlined_func$maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe$4
836+
;; NO_PART-NEXT: (block
837+
;; NO_PART-NEXT: )
838+
;; NO_PART-NEXT: )
839+
;; NO_PART-NEXT: (block $__inlined_func$mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm$5
840+
;; NO_PART-NEXT: (block
841+
;; NO_PART-NEXT: )
842+
;; NO_PART-NEXT: )
758843
;; NO_PART-NEXT: )
759844
;; NO_FULL: (func $caller
760845
;; NO_FULL-NEXT: (local $0 i32)
@@ -817,6 +902,21 @@
817902
;; NO_FULL-NEXT: )
818903
;; NO_FULL-NEXT: )
819904
;; NO_FULL-NEXT: )
905+
;; NO_FULL-NEXT: (block $__inlined_func$very-long-name-we-should-not-error-on-even-though-it-is-very-very-long$4
906+
;; NO_FULL-NEXT: (block
907+
;; NO_FULL-NEXT: )
908+
;; NO_FULL-NEXT: )
909+
;; NO_FULL-NEXT: (call $very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long)
910+
;; NO_FULL-NEXT: (call $very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long)
911+
;; NO_FULL-NEXT: (block $__inlined_func$maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmayb$5
912+
;; NO_FULL-NEXT: (block
913+
;; NO_FULL-NEXT: )
914+
;; NO_FULL-NEXT: )
915+
;; NO_FULL-NEXT: (call $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe)
916+
;; NO_FULL-NEXT: (block $__inlined_func$mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm$6
917+
;; NO_FULL-NEXT: (block
918+
;; NO_FULL-NEXT: )
919+
;; NO_FULL-NEXT: )
820920
;; NO_FULL-NEXT: )
821921
;; NO_BOTH: (func $caller
822922
;; NO_BOTH-NEXT: (call $maybe-partial-or-full-1
@@ -831,6 +931,21 @@
831931
;; NO_BOTH-NEXT: (call $maybe-partial-or-full-2
832932
;; NO_BOTH-NEXT: (i32.const 1)
833933
;; NO_BOTH-NEXT: )
934+
;; NO_BOTH-NEXT: (block $__inlined_func$very-long-name-we-should-not-error-on-even-though-it-is-very-very-long
935+
;; NO_BOTH-NEXT: (block
936+
;; NO_BOTH-NEXT: )
937+
;; NO_BOTH-NEXT: )
938+
;; NO_BOTH-NEXT: (call $very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long)
939+
;; NO_BOTH-NEXT: (call $very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long)
940+
;; NO_BOTH-NEXT: (block $__inlined_func$maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmayb$1
941+
;; NO_BOTH-NEXT: (block
942+
;; NO_BOTH-NEXT: )
943+
;; NO_BOTH-NEXT: )
944+
;; NO_BOTH-NEXT: (call $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe)
945+
;; NO_BOTH-NEXT: (block $__inlined_func$mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm$2
946+
;; NO_BOTH-NEXT: (block
947+
;; NO_BOTH-NEXT: )
948+
;; NO_BOTH-NEXT: )
834949
;; NO_BOTH-NEXT: )
835950
(func $caller
836951
;; In YES_ALL we will fully inline all of these. In NO_FULL we will partially
@@ -851,6 +966,12 @@
851966
(call $maybe-partial-or-full-2
852967
(i32.const 1)
853968
)
969+
(call $very-long-name-we-should-not-error-on-even-though-it-is-very-very-long)
970+
(call $very-long-name-we-should-not-error-on-maybe-even-though-it-is-very-very-long)
971+
(call $very-long-name-we-may-should-not-error-on-maybe-even-though-it-is-very-very-long)
972+
(call $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmayb)
973+
(call $maybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybmaybe)
974+
(call $mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm)
854975
)
855976
)
856977
;; NO_FULL: (func $byn-split-outlined-B$maybe-partial-or-full-1 (param $x i32)

0 commit comments

Comments
 (0)