Skip to content

Commit

Permalink
Preserve extension on cached refs
Browse files Browse the repository at this point in the history
Rather than pure MD5, capture the extension used. This allows for
`.json5` and `.yaml` files, which indicate filetype to parsing.
  • Loading branch information
sirosen committed Jul 1, 2024
1 parent 18b7bac commit 56e16c5
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
17 changes: 16 additions & 1 deletion src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@
from ..utils import filename2path


def ref_url_to_cache_filename(ref_url: str) -> str:
"""
Given a $ref URL, convert it to the filename in the refs/ cache dir.
Rules are as follows:
- the base filename is an md5 hash of the URL
- if the filename ends in an extension (.json, .yaml, etc) that extension
is appended to the hash
"""
filename = hashlib.md5(ref_url.encode()).hexdigest()
if "." in (last_part := ref_url.rpartition("/")[-1]):
_, _, extension = last_part.rpartition(".")
filename = f"{filename}.{extension}"
return filename


def make_reference_registry(
parsers: ParserSet, retrieval_uri: str | None, schema: dict, disable_cache: bool
) -> referencing.Registry:
Expand Down Expand Up @@ -75,7 +90,7 @@ def validation_callback(content: bytes) -> None:

bound_downloader = downloader.bind(
full_uri,
hashlib.md5(full_uri.encode()).hexdigest(),
ref_url_to_cache_filename(full_uri),
validation_callback,
)
with bound_downloader.open() as fp:
Expand Down
13 changes: 6 additions & 7 deletions tests/acceptance/test_remote_ref_resolution.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import hashlib
import json

import pytest
import responses

from check_jsonschema.schema_loader.resolver import ref_url_to_cache_filename

CASES = {
"case1": {
"main_schema": {
Expand Down Expand Up @@ -36,10 +37,6 @@
}


def _md5(s):
return hashlib.md5(s.encode()).hexdigest()


@pytest.mark.parametrize("check_passes", (True, False))
@pytest.mark.parametrize("casename", ("case1", "case2"))
def test_remote_ref_resolution_simple_case(run_line, check_passes, casename, tmp_path):
Expand Down Expand Up @@ -95,7 +92,9 @@ def test_remote_ref_resolution_cache_control(

cache_locs = []
for ref_loc in ref_locs:
cache_locs.append(cache_dir / "check_jsonschema" / "refs" / _md5(ref_loc))
cache_locs.append(
cache_dir / "check_jsonschema" / "refs" / ref_url_to_cache_filename(ref_loc)
)
assert cache_locs # sanity check
if disable_cache:
for loc in cache_locs:
Expand Down Expand Up @@ -126,7 +125,7 @@ def test_remote_ref_resolution_loads_from_cache(
ref_locs.append(other_schema_loc)

# but populate the cache with "good data"
cache_loc = ref_cache_dir / _md5(other_schema_loc)
cache_loc = ref_cache_dir / ref_url_to_cache_filename(other_schema_loc)
cache_locs.append(cache_loc)
cache_loc.write_text(json.dumps(subschema))

Expand Down

0 comments on commit 56e16c5

Please sign in to comment.