Skip to content

Commit

Permalink
refactor: cleanup glob implementation, add tests (#420)
Browse files Browse the repository at this point in the history
Prefactor to #419
  • Loading branch information
jbedard authored May 5, 2023
1 parent 6f1f2fb commit c363420
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 55 deletions.
106 changes: 53 additions & 53 deletions lib/private/glob_match.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,43 @@ implementation and tests were used as a reference implementation:
https://github.com/pnpm/pnpm/blob/v7.4.0-2/packages/matcher/test/index.ts
"""

def _split_on(expr, splits):
# Splits an expression on the tokens in splits but keeps the tokens split in the result.
# Tokens are matched in order so a token such as `**` should come before `*`.
GLOB_SYMBOLS = ["**", "*", "?"]

# "forever" (2^30) for ~ while(true) loops
_FOREVER = range(1073741824)

def _split_expr(expr):
result = []
accumulator = ""
skip = 0
has_splits = False
for i in range(len(expr)):
j = i + skip
if j >= len(expr):

# Splits an expression on the tokens in GLOB_SYMBOLS but keeps the tokens symb in the result.
# Tokens are matched in order so a token such as `**` should come before `*`.
expr_len = len(expr)
accumulator = 0
i = 0
for _ in _FOREVER:
if i >= expr_len:
break
for split in splits:
if not split:
fail("empty split token")
if expr[j:].startswith(split):
if accumulator:
result.append(accumulator)
accumulator = ""
result.append(split)
skip = skip + len(split)
j = i + skip
has_splits = True

found_symb = None
for symb in GLOB_SYMBOLS:
if expr.startswith(symb, i):
found_symb = symb
break
if j >= len(expr):
break
accumulator = accumulator + expr[j]
if accumulator:
result.append(accumulator)
return result, has_splits

GLOB_SYMBOLS = ["**", "*", "?"]
if found_symb:
if accumulator != i:
result.append(expr[accumulator:i])

result.append(found_symb)
i = i + len(found_symb)
accumulator = i
else:
i = i + 1

if accumulator != i:
result.append(expr[accumulator:])

return result

def is_glob(expr):
"""Determine if the passed string is a globa expression
Expand Down Expand Up @@ -78,23 +84,19 @@ def glob_match(expr, path, match_path_separator = False):
if expr == "":
fail("glob_match: invalid empty glob expression")

if not is_glob(expr):
# the expression is not a glob (does bot have any glob symbols) so the only match is an exact match
return expr == path

if expr.find("***") != -1:
fail("glob_match: invalid *** pattern found in glob expression")

if expr == "**":
# matches everything
return True

expr_parts, has_splits = _split_on(expr, GLOB_SYMBOLS[:])

# Quick exit for simple cases.
if not has_splits:
if not is_glob(expr):
# the expression is not a glob (does bot have any glob symbols) so the only match is an exact match
return expr == path

expr_parts = _split_expr(expr)

for i, expr_part in enumerate(expr_parts):
if expr_part == "**":
if i > 0 and not expr_parts[i - 1].endswith("/"):
Expand All @@ -109,40 +111,46 @@ def glob_match(expr, path, match_path_separator = False):
expr_i = 0
path_i = 0

# Loop "forever" (2^30).
for _ in range(1073741824):
for _ in _FOREVER:
subpath = path[path_i:] if path_i < len(path) else None
subexpr = expr_parts[expr_i] if expr_i < len(expr_parts) else None

# The next part of the expression.
next_pp = expr_parts[expr_i + 1] if expr_i + 1 < len(expr_parts) else None
next_subexpr = expr_parts[expr_i + 1] if expr_i + 1 < len(expr_parts) else None

at_slash = subpath != None and subpath.startswith("/")

stop_at_leading_path_separator = not match_path_separator and subpath != None and subpath.startswith("/")
stop_at_contained_path_separator = not match_path_separator and subpath != None and subpath.find("/") != -1
# Reached the end of the expression and path.
if path_i >= len(path) and expr_i >= len(expr_parts):
return True

# Reached the end of the path on a final empty "*" or "**" expression
if path_i >= len(path) and expr_i == len(expr_parts) - 1 and (subexpr == "*" or subexpr == "**"):
return True

if (subexpr == "*" and subpath != None and not stop_at_leading_path_separator) or (subexpr == "**" and subpath != None):
if (subexpr == "*" and subpath != None and (match_path_separator or not at_slash)) or (subexpr == "**" and subpath != None):
# A wildcard or globstar in the expression and something to consume.
if next_pp == None and not stop_at_contained_path_separator:
if next_subexpr == None and (match_path_separator or subpath.find("/") == -1):
# This wildcard is the last and matches everything beyond here.
return True

# If the next part of the expression matches the current subpath
# then advance past the wildcard and consume that next expression.
if next_pp != None and subpath.startswith(next_pp):
if next_subexpr != None and subpath.startswith(next_subexpr):
# Persist the alternative of using the wildcard instead of advancing.
branches.append([expr_i, path_i + 1])
expr_i = expr_i + 1
else:
# Otherwise consume the next character.
path_i = path_i + 1

elif subexpr == "*" and subpath != None and stop_at_leading_path_separator and next_pp != None and subpath.startswith(next_pp):
elif subexpr == "*" and subpath != None and next_subexpr != None and subpath.startswith(next_subexpr):
# A wildcard that has hit a path separator but we can branch
# Persist the alternative of using the wildcard instead of advancing.
branches.append([expr_i, path_i + 1])
expr_i = expr_i + 1

elif subexpr == "?" and subpath != None and not stop_at_leading_path_separator:
elif subexpr == "?" and subpath != None and (match_path_separator or not at_slash):
# The string matches a ? wildcard at the current location in the path.
expr_i = expr_i + 1
path_i = path_i + 1
Expand All @@ -152,10 +160,6 @@ def glob_match(expr, path, match_path_separator = False):
expr_i = expr_i + 1
path_i = path_i + len(subexpr)

elif subpath == None and expr_i == len(expr_parts) - 1 and (subexpr == "*" or subexpr == "**"):
# Reached the package on a final empty "*" or "**" expression
return True

elif len(branches) > 0:
# The string does not match, backup to the previous branch.
[restored_pattern_i, restored_path_i] = branches.pop()
Expand All @@ -167,8 +171,4 @@ def glob_match(expr, path, match_path_separator = False):
# The string does not match, with no branches to rollback to, there is no match.
return False

if path_i == len(path) and expr_i == len(expr_parts):
# Reached the end of the expression and package.
return True

fail("glob_match: reached the end of the (in)finite loop")
63 changes: 61 additions & 2 deletions lib/tests/glob_match_test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ def _star(ctx):

star_test = unittest.make(_star)

def _trailing_star(ctx):
return _glob_match_test(
ctx,
"x/*",
matches = ["x/y", "x/y.z"],
non_matches = ["x", "x/y/z"],
mps_matches = ["x/y/z"],
mps_non_matches = ["x"],
)

trailing_star_test = unittest.make(_trailing_star)

def _globstar(ctx):
return _glob_match_test(ctx, "**", ["@eslint/plugin-foo", "express"], [])

Expand Down Expand Up @@ -109,6 +121,18 @@ def _mixed_wrapped_qmark(ctx):

mixed_wrapped_qmark_test = unittest.make(_mixed_wrapped_qmark)

def _leading_star_test(ctx):
return _glob_match_test(
ctx,
"*/foo.*",
matches = ["fum/foo.x", "a/foo.bcd"],
non_matches = ["foo.x", "a/b/foo.x", "a/foo"],
mps_matches = ["fum/foo.x", "a/b/foo.x", "a/foo.bcd"],
mps_non_matches = ["foo.x", "a/foo"],
)

leading_star_test = unittest.make(_leading_star_test)

def _ending_star(ctx):
return _glob_match_test(ctx, "eslint-*", ["eslint-plugin-foo"], ["@eslint/plugin-foo", "express", "eslint", "-eslint"])

Expand Down Expand Up @@ -140,8 +164,8 @@ def _mixed_trailing_globstar(ctx):
return _glob_match_test(
ctx,
"foo*/**",
matches = ["foo/fum/bar", "foostar/fum/bar"],
non_matches = ["fo/fum/bar", "fostar/fum/bar", "foo", "foostar"],
matches = ["foo/fum/bar", "foostar/fum/bar", "foo/a", "foob/c"],
non_matches = ["fo/fum/bar", "fostar/fum/bar", "foo", "foostar", "afoo", "b/foo/c"],
)

mixed_trailing_globstar_test = unittest.make(_mixed_trailing_globstar)
Expand All @@ -156,6 +180,16 @@ def _mixed_leading_globstar(ctx):

mixed_leading_globstar_test = unittest.make(_mixed_leading_globstar)

def _mixed_leading_globstar2(ctx):
return _glob_match_test(
ctx,
"**/*foo",
matches = ["fum/bar/foo", "fum/bar/starfoo"],
non_matches = ["fum/bar/foox", "fum/bar/foo/y"],
)

mixed_leading_globstar2_test = unittest.make(_mixed_leading_globstar2)

def _mixed_wrapping_globstar(ctx):
return _glob_match_test(
ctx,
Expand All @@ -166,6 +200,26 @@ def _mixed_wrapping_globstar(ctx):

mixed_wrapper_globstar_test = unittest.make(_mixed_wrapping_globstar)

def _all_of_ext(ctx):
return _glob_match_test(
ctx,
"**/*.tf",
matches = ["a/b.tf", "ab/cd/e.tf"],
non_matches = ["a/b.tfg", "a/tf", "a/b.tf/g"],
)

all_of_ext_test = unittest.make(_all_of_ext)

def _all_of_name(ctx):
return _glob_match_test(
ctx,
"**/foo",
matches = ["a/b/c/foo", "foo/foo", "a/foo/foo"],
non_matches = ["foox", "foo/x"],
)

all_of_name_test = unittest.make(_all_of_name)

def _is_glob(ctx):
env = unittest.begin(ctx)

Expand Down Expand Up @@ -201,17 +255,22 @@ def glob_match_test_suite():
"glob_match",
partial.make(basic_test, timeout = "short"),
partial.make(star_test, timeout = "short"),
partial.make(trailing_star_test, timeout = "short"),
partial.make(globstar_test, timeout = "short"),
partial.make(qmark_test, timeout = "short"),
partial.make(qmark_qmark_test, timeout = "short"),
partial.make(wrapped_qmark_test, timeout = "short"),
partial.make(mixed_wrapped_qmark_test, timeout = "short"),
partial.make(leading_star_test, timeout = "short"),
partial.make(ending_star_test, timeout = "short"),
partial.make(wrapping_star_test, timeout = "short"),
partial.make(wrapped_star_test, timeout = "short"),
partial.make(all_of_ext_test, timeout = "short"),
partial.make(all_of_name_test, timeout = "short"),
partial.make(starting_star_test, timeout = "short"),
partial.make(mixed_trailing_globstar_test, timeout = "short"),
partial.make(mixed_leading_globstar_test, timeout = "short"),
partial.make(mixed_leading_globstar2_test, timeout = "short"),
partial.make(mixed_wrapper_globstar_test, timeout = "short"),
)

Expand Down

0 comments on commit c363420

Please sign in to comment.