-
-
Notifications
You must be signed in to change notification settings - Fork 13
Description
Python Version
3.10.6
Django Version
4.1.3
Package Version
0.1.1
Description
Intro
Let me start with an example. Here is an example Django app configuration:
my_app/apps.py:
from django.apps import AppConfig
from django.conf import settings
from django.utils.autoreload import BaseReloader, autoreload_started
# my_app
_name = __name__[: __name__.index(".")]
class MyApp(AppConfig):
name = _name
def ready(self):
autoreload_started.connect(receiver=self._configure_autoreload, dispatch_uid=_name)
def _configure_autoreload(self, *, sender: BaseReloader, **kwargs):
if not hasattr(settings, "BASE_DIR"):
print(f"{_name}: Please provide BASE_DIR Django setting pointing to root folder of your project")
exit_with_load_error()
basedir = os.path.abspath(settings.BASE_DIR)
sender.watch_dir(basedir, "**/.env")
sender.watch_dir(basedir, "**/.env.*")How are these patterns handled by Django's default StatReloader:
https://github.com/django/django/blob/f030236a86a64a4befd3cc8093e2bbeceef52a31/django/utils/autoreload.py#L411
class StatReloader(BaseReloader):
# ...
def snapshot_files(self):
# ...
for file in self.watched_files():
# ...check if file changedclass BaseReloader:
# ...
def watched_files(self, include_globs=True):
# ...
if include_globs:
for directory, patterns in self.directory_globs.items():
for pattern in patterns:
yield from directory.glob(pattern)TLDR: directory.glob is used, so a pattern like **/.env would, for example, match a file named .env placed directly in the given directory.
Here is what django-watchfiles is doing:
| for directory, globs in self.directory_globs.items(): |
class WatchfilesReloader(autoreload.BaseReloader):
# ...
def file_filter(self, change: watchfiles.Change, filename: str) -> bool:
# ...
for directory, globs in self.directory_globs.items():
try:
relative_path = path.relative_to(directory)
except ValueError:
pass
else:
# ...
for glob in globs:
if fnmatch.fnmatch(str(relative_path), glob):
# ...
return TrueTLDR: It attempts to check if the relative path from directory to a changed file satisfies a glob pattern with fnmatch
The problem
Using fnmatch like this is no replacement for directory.glob(...). You can get more intuition, for example, here: https://stackoverflow.com/questions/27726545/. For sake of illustration:
-
fnmatchdoes not handle**at all: https://docs.python.org/3/library/fnmatch.html -
fnmatchmatches the given path right to left and reports a successful match even if the path does not match fully. So, whilePath('/home/user/').glob('a.txt')would only returna.txtplaced directly in the given directory,fnmatch('a.txt', 'a.txt')andfnmatch('some/nested/folder/a.txt', 'a.txt')would both return true.
So, user gets different results when using the same pattern depending on whether StatReloader or WatchfilesReloader is used.
Possible solutions
Ideally Python should offer some equivalent to directory.glob(pattern) -> filenames, which would allow one to validate a filename against a glob within a given directory or, to the same extent, to validate a relative path against a glob, something like globmatch(relative_path, glob) -> bool.
Unfortunately, there is no such thing in Python's standard library. Also, as far as I discovered, there are no third-party packages that specifically aim to offer such an equivalent to Path.glob. However, there are packages that offer similar glob-matching.
I have prepared a test project comparing Path.glob with fnmatch, globmatch and globber:
import os
# Example:
#
#test_tree = [
# "abc/def/ghi/a.txt",
# "abc/def/a.txt",
# "abc/a.txt",
# "abc/c.txt",
# "a.txt",
# "b.txt"
#]
def mktree(root: str, tree: list[str]) -> None:
for path in tree:
if path.endswith('/'):
os.makedirs(os.path.join(root, path), exist_ok=True)
else:
os.makedirs(os.path.dirname(os.path.join(root, path)), exist_ok=True)
_touch(os.path.join(root, path))
def _touch(filename: str) -> None:
open(filename, 'a+').close()
``
```python
from fnmatch import fnmatch
import itertools
from pathlib import Path
import sys
import tempfile
from typing import Iterable, Protocol
import globber
from globmatch import glob_match
import polars as pl
class _Glob(Protocol):
def __call__(root: str, glob: str) -> Iterable[str]: ...
class _GlobImplementation:
@staticmethod
def glob(root: str, glob: str) -> Iterable[str]:
return (str(name.relative_to(root)) for name in Path(root).glob(glob))
@staticmethod
def fnmatch(root: str, glob: str) -> Iterable[str]:
for dir, dirnames, filenames in os.walk(root):
for name in itertools.chain(dirnames, filenames):
name = str(Path(os.path.join(dir, name)).relative_to(root))
print(f"fnmatch({name!r}, {glob!r}) => {fnmatch(name, glob)!r}", file=sys.stderr)
if fnmatch(name, glob):
yield name
@staticmethod
def globmatch(root: str, glob: str) -> Iterable[str]:
for dir, dirnames, filenames in os.walk(root):
for name in itertools.chain(dirnames, filenames):
name = str(Path(os.path.join(dir, name)).relative_to(root))
print(f"globmatch({name!r}, [{glob!r}]) => {glob_match(name, [glob])!r}", file=sys.stderr)
if glob_match(name, [glob]):
yield name
@staticmethod
def globber(root: str, glob: str) -> Iterable[str]:
for dir, dirnames, filenames in os.walk(root):
for name in itertools.chain(dirnames, filenames):
name = str(Path(os.path.join(dir, name)).relative_to(root))
print(f"globber.match({glob!r}, {name!r}) => {globber.match(glob, name)!r}", file=sys.stderr)
if globber.match(glob, name):
yield name
def _test_glob_implementation(root: str, globs: list[str], impl: _Glob) -> Iterable[str]:
for glob in globs:
yield "; ".join(impl(root, glob))
def gather_globs(tree: list[str], globs: list[str]) -> pl.DataFrame:
with tempfile.TemporaryDirectory() as root:
mktree(root, tree)
return pl.DataFrame(dict(
globs=globs,
glob=list(_test_glob_implementation(root, globs, _GlobImplementation.glob)),
fnmatch=list(_test_glob_implementation(root, globs, _GlobImplementation.fnmatch)),
globmatch=list(_test_glob_implementation(root, globs, _GlobImplementation.globmatch)),
globber=list(_test_glob_implementation(root, globs, _GlobImplementation.globber)),
))def _gather_basic():
tree = [
"abc/def/ghi/a.txt",
"abc/def/a.txt",
"abc/a.txt",
"abc/c.txt",
"a.txt",
"b.txt"
]
globs = [
"a.txt",
"*/a.txt",
"*/*/a.txt",
"**/a.txt",
"**/**/a.txt",
# Path(...).glob() - non-relative paths are unsupported
#"/a.txt",
#"/*/a.txt",
#"/*/*/a.txt",
#"/**/a.txt",
#"/**/**/a.txt",
"abc/a.txt",
"abc/*/a.txt",
"abc/*/*/a.txt",
"abc/**/a.txt",
"abc/**/**/a.txt",
"**/*.txt",
]
return gather_globs(tree, globs)
def _gather_hidden():
tree = [
".git/index"
]
globs = [
"**/index"
]
return gather_globs(tree, globs)
def gather():
df_basic = _gather_basic()
df_hidden = _gather_hidden()
os.makedirs("../data/globmatch", exist_ok=True)
df_basic.write_csv("../data/globmatch/basic.csv")
df_hidden.write_csv("../data/globmatch/hidden.csv")
return [df_basic, df_hidden]dfs = gather()fnmatch and globmatch are junk, but globber did well, matching the behavior of Path.glob in my test cases exacltly. Here is a Google sheet with the results (those matching Path.glob highlighted with green):
https://docs.google.com/spreadsheets/d/1M2fcYlW19n1HACQi_MbYgBIMtbopnGur6mVjyNaowqc/
Based on that, I would suggest replacing fnmatch with globber.
globber is not a mature project and is not widely known, but it certainly improves the situation. Given that people haven't discovered that additional globs, basically, just don't work as expected with django-watchfiles, they probably won't have issues with globber either. If / when they do, globber's source code is only some 30 lines, so it should be fairly easy to tweak it if needed: https://github.com/asharov/globber/blob/main/globber/globber.py
