Skip to content

Commit 56e16c5

Browse files
committed
Preserve extension on cached refs
Rather than pure MD5, capture the extension used. This allows for `.json5` and `.yaml` files, which indicate filetype to parsing.
1 parent 18b7bac commit 56e16c5

File tree

2 files changed

+22
-8
lines changed

2 files changed

+22
-8
lines changed

src/check_jsonschema/schema_loader/resolver.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@
1212
from ..utils import filename2path
1313

1414

15+
def ref_url_to_cache_filename(ref_url: str) -> str:
16+
"""
17+
Given a $ref URL, convert it to the filename in the refs/ cache dir.
18+
Rules are as follows:
19+
- the base filename is an md5 hash of the URL
20+
- if the filename ends in an extension (.json, .yaml, etc) that extension
21+
is appended to the hash
22+
"""
23+
filename = hashlib.md5(ref_url.encode()).hexdigest()
24+
if "." in (last_part := ref_url.rpartition("/")[-1]):
25+
_, _, extension = last_part.rpartition(".")
26+
filename = f"{filename}.{extension}"
27+
return filename
28+
29+
1530
def make_reference_registry(
1631
parsers: ParserSet, retrieval_uri: str | None, schema: dict, disable_cache: bool
1732
) -> referencing.Registry:
@@ -75,7 +90,7 @@ def validation_callback(content: bytes) -> None:
7590

7691
bound_downloader = downloader.bind(
7792
full_uri,
78-
hashlib.md5(full_uri.encode()).hexdigest(),
93+
ref_url_to_cache_filename(full_uri),
7994
validation_callback,
8095
)
8196
with bound_downloader.open() as fp:

tests/acceptance/test_remote_ref_resolution.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import hashlib
21
import json
32

43
import pytest
54
import responses
65

6+
from check_jsonschema.schema_loader.resolver import ref_url_to_cache_filename
7+
78
CASES = {
89
"case1": {
910
"main_schema": {
@@ -36,10 +37,6 @@
3637
}
3738

3839

39-
def _md5(s):
40-
return hashlib.md5(s.encode()).hexdigest()
41-
42-
4340
@pytest.mark.parametrize("check_passes", (True, False))
4441
@pytest.mark.parametrize("casename", ("case1", "case2"))
4542
def test_remote_ref_resolution_simple_case(run_line, check_passes, casename, tmp_path):
@@ -95,7 +92,9 @@ def test_remote_ref_resolution_cache_control(
9592

9693
cache_locs = []
9794
for ref_loc in ref_locs:
98-
cache_locs.append(cache_dir / "check_jsonschema" / "refs" / _md5(ref_loc))
95+
cache_locs.append(
96+
cache_dir / "check_jsonschema" / "refs" / ref_url_to_cache_filename(ref_loc)
97+
)
9998
assert cache_locs # sanity check
10099
if disable_cache:
101100
for loc in cache_locs:
@@ -126,7 +125,7 @@ def test_remote_ref_resolution_loads_from_cache(
126125
ref_locs.append(other_schema_loc)
127126

128127
# but populate the cache with "good data"
129-
cache_loc = ref_cache_dir / _md5(other_schema_loc)
128+
cache_loc = ref_cache_dir / ref_url_to_cache_filename(other_schema_loc)
130129
cache_locs.append(cache_loc)
131130
cache_loc.write_text(json.dumps(subschema))
132131

0 commit comments

Comments
 (0)