Skip to content

Commit 76b19fe

Browse files
authored
Merge pull request #289 from python-jsonschema/use-referencing
Use the new 'referencing' implementation in 'jsonschema'
2 parents 0486f68 + 547f86b commit 76b19fe

File tree

15 files changed

+376
-82
lines changed

15 files changed

+376
-82
lines changed

.github/workflows/build.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
py: ["3.x"]
1212
include:
1313
- toxenv: py-mindeps
14-
py: "3.7"
14+
py: "3.8"
1515

1616
runs-on: ubuntu-latest
1717
name: "Run '${{ matrix.toxenv }}' on python ${{ matrix.py }}"
@@ -40,7 +40,7 @@ jobs:
4040
strategy:
4141
matrix:
4242
os: [ubuntu-latest, windows-latest, macos-latest]
43-
py: ['3.7', '3.8', '3.9', '3.10', '3.11']
43+
py: ['3.8', '3.9', '3.10', '3.11']
4444
name: "Run tests on ${{ matrix.os }}, py${{ matrix.py }}"
4545
runs-on: ${{ matrix.os }}
4646
steps:

CHANGELOG.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@ Unreleased
1111
.. vendor-insert-here
1212
1313
- Update vendored schemas (2023-07-18)
14+
- Remove support for python3.7
15+
- The minimum supported version of the `jsonschema` library is now `4.18.0`,
16+
which introduces new `$ref` resolution behavior and fixes. That behavior is
17+
used in all cases, which should result in faster evaluation especially on
18+
large schemas.
19+
- `$ref` usage may now refer to YAML, TOML, or JSON5 files, or any other
20+
non-JSON format supported by `check-jsonschema`. The file type is inferred
21+
only from the file extension in these cases and defaults to JSON if there is
22+
no recognizable extension.
1423

1524
0.23.3
1625
------

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ python_requires = >=3.7
2020
install_requires =
2121
importlib-resources>=1.4.0;python_version<"3.9"
2222
ruamel.yaml==0.17.32
23-
jsonschema>=4.5.1,<5.0
23+
jsonschema>=4.18.0,<5.0
2424
requests<3.0
2525
click>=8,<9
2626
package_dir=

src/check_jsonschema/checker.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import click
77
import jsonschema
8+
import referencing.exceptions
89

910
from . import utils
1011
from .formats import FormatOptions
@@ -75,7 +76,11 @@ def _build_result(self) -> CheckResult:
7576
def _run(self) -> None:
7677
try:
7778
result = self._build_result()
78-
except jsonschema.RefResolutionError as e:
79+
except (
80+
referencing.exceptions.NoSuchResource,
81+
referencing.exceptions.Unretrievable,
82+
referencing.exceptions.Unresolvable,
83+
) as e:
7984
self._fail("Failure resolving $ref within schema\n", e)
8085

8186
self._reporter.report_result(result)

src/check_jsonschema/identify_filetype.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
}
1919

2020

21-
def path_to_type(path: pathlib.Path, *, default_type: str = "json") -> str:
22-
ext = path.suffix.lstrip(".")
21+
def path_to_type(path: str | pathlib.Path, *, default_type: str = "json") -> str:
22+
if isinstance(path, str):
23+
ext = path.rpartition(".")[2]
24+
else:
25+
ext = path.suffix.lstrip(".")
2326

2427
if ext in _EXTENSION_MAP:
2528
return _EXTENSION_MAP[ext]

src/check_jsonschema/parsers/__init__.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def __init__(
6565
}
6666

6767
def get(
68-
self, path: pathlib.Path, default_filetype: str
68+
self, path: pathlib.Path | str, default_filetype: str
6969
) -> t.Callable[[t.BinaryIO], t.Any]:
7070
filetype = path_to_type(path, default_type=default_filetype)
7171

@@ -82,10 +82,15 @@ def get(
8282
+ ",".join(self._by_tag.keys())
8383
)
8484

85-
def parse_file(self, path: pathlib.Path, default_filetype: str) -> t.Any:
85+
def parse_data_with_path(
86+
self, data: t.BinaryIO, path: pathlib.Path | str, default_filetype: str
87+
) -> t.Any:
8688
loadfunc = self.get(path, default_filetype)
8789
try:
88-
with open(path, "rb") as fp:
89-
return loadfunc(fp)
90+
return loadfunc(data)
9091
except LOADING_FAILURE_ERROR_TYPES as e:
9192
raise FailedFileLoadError(f"Failed to parse {path}") from e
93+
94+
def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any:
95+
with open(path, "rb") as fp:
96+
return self.parse_data_with_path(fp, path, default_filetype)

src/check_jsonschema/schema_loader/main.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99

1010
from ..builtin_schemas import get_builtin_schema
1111
from ..formats import FormatOptions, make_format_checker
12+
from ..parsers import ParserSet
1213
from ..utils import is_url_ish
1314
from .errors import UnsupportedUrlScheme
1415
from .readers import HttpSchemaReader, LocalSchemaReader
15-
from .resolver import make_ref_resolver
16+
from .resolver import make_reference_registry
1617

1718

1819
def _extend_with_default(
@@ -71,6 +72,9 @@ def __init__(
7172
if is_url_ish(self.schemafile):
7273
self.url_info = urllib.parse.urlparse(self.schemafile)
7374

75+
# setup a parser collection
76+
self._parsers = ParserSet()
77+
7478
# setup a schema reader lazily, when needed
7579
self._reader: LocalSchemaReader | HttpSchemaReader | None = None
7680

@@ -96,8 +100,8 @@ def _get_schema_reader(self) -> LocalSchemaReader | HttpSchemaReader:
96100
f"detected parsed URL had an unrecognized scheme: {self.url_info}"
97101
)
98102

99-
def get_schema_ref_base(self) -> str | None:
100-
return self.reader.get_ref_base()
103+
def get_schema_retrieval_uri(self) -> str | None:
104+
return self.reader.get_retrieval_uri()
101105

102106
def get_schema(self) -> dict[str, t.Any]:
103107
return self.reader.read_schema()
@@ -109,19 +113,19 @@ def get_validator(
109113
format_opts: FormatOptions,
110114
fill_defaults: bool,
111115
) -> jsonschema.Validator:
112-
schema_uri = self.get_schema_ref_base()
116+
retrieval_uri = self.get_schema_retrieval_uri()
113117
schema = self.get_schema()
114118

115119
schema_dialect = schema.get("$schema")
116120

117121
# format checker (which may be None)
118122
format_checker = make_format_checker(format_opts, schema_dialect)
119123

120-
# ref resolver which may be built from the schema path
121-
# if the location is a URL, there's no change, but if it's a file path
122-
# it's made absolute and URI-ized
123-
# the resolver should use `$id` if there is one present in the schema
124-
ref_resolver = make_ref_resolver(schema_uri, schema)
124+
# reference resolution
125+
# with support for YAML, TOML, and other formats from the parsers
126+
reference_registry = make_reference_registry(
127+
self._parsers, retrieval_uri, schema
128+
)
125129

126130
# get the correct validator class and check the schema under its metaschema
127131
validator_cls = jsonschema.validators.validator_for(schema)
@@ -134,7 +138,7 @@ def get_validator(
134138
# now that we know it's safe to try to create the validator instance, do it
135139
validator = validator_cls(
136140
schema,
137-
resolver=ref_resolver,
141+
registry=reference_registry,
138142
format_checker=format_checker,
139143
)
140144
return t.cast(jsonschema.Validator, validator)
@@ -143,8 +147,9 @@ def get_validator(
143147
class BuiltinSchemaLoader(SchemaLoader):
144148
def __init__(self, schema_name: str) -> None:
145149
self.schema_name = schema_name
150+
self._parsers = ParserSet()
146151

147-
def get_schema_ref_base(self) -> str | None:
152+
def get_schema_retrieval_uri(self) -> str | None:
148153
return None
149154

150155
def get_schema(self) -> dict[str, t.Any]:

src/check_jsonschema/schema_loader/readers.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,12 @@ def _run_load_callback(schema_location: str, callback: t.Callable) -> dict:
2525

2626

2727
class LocalSchemaReader:
28-
FORMATS = ("json", "json5", "yaml")
29-
3028
def __init__(self, filename: str) -> None:
3129
self.path = filename2path(filename)
3230
self.filename = str(self.path)
33-
self.parsers = ParserSet(supported_formats=self.FORMATS)
31+
self.parsers = ParserSet()
3432

35-
def get_ref_base(self) -> str:
33+
def get_retrieval_uri(self) -> str:
3634
return self.path.as_uri()
3735

3836
def _read_impl(self) -> t.Any:
@@ -57,7 +55,7 @@ def __init__(
5755
validation_callback=json.loads,
5856
)
5957

60-
def get_ref_base(self) -> str:
58+
def get_retrieval_uri(self) -> str:
6159
return self.url
6260

6361
def _read_impl(self) -> t.Any:
Lines changed: 65 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,69 @@
11
from __future__ import annotations
22

33
import typing as t
4+
import urllib.parse
45

5-
import click
6-
import jsonschema
7-
8-
9-
class _CliRefResolver(jsonschema.RefResolver):
10-
def resolve_remote(self, uri: str) -> t.Any:
11-
if uri.endswith(".yaml") or uri.endswith(".yml"):
12-
click.secho(
13-
"""\
14-
WARNING: You appear to be using a schema which references a YAML file.
15-
16-
This is not supported by check-jsonschema and may result in errors.
17-
""",
18-
err=True,
19-
fg="yellow",
20-
)
21-
elif uri.endswith(".json5"):
22-
click.secho(
23-
"""\
24-
WARNING: You appear to be using a schema which references a JSON5 file.
25-
26-
This is not supported by check-jsonschema and may result in errors.
27-
""",
28-
err=True,
29-
fg="yellow",
30-
)
31-
return super().resolve_remote(uri)
32-
33-
34-
def make_ref_resolver(
35-
schema_uri: str | None, schema: dict
36-
) -> jsonschema.RefResolver | None:
37-
if not schema_uri:
38-
return None
39-
40-
base_uri = schema.get("$id", schema_uri)
41-
# FIXME: temporary type-ignore because typeshed has the type wrong
42-
return _CliRefResolver(base_uri, schema) # type: ignore[arg-type]
6+
import referencing
7+
import requests
8+
from referencing.jsonschema import DRAFT202012, Schema
9+
10+
from ..parsers import ParserSet
11+
from ..utils import filename2path
12+
13+
14+
def make_reference_registry(
15+
parsers: ParserSet, retrieval_uri: str | None, schema: dict
16+
) -> referencing.Registry:
17+
id_attribute_: t.Any = schema.get("$id")
18+
if isinstance(id_attribute_, str):
19+
id_attribute: str | None = id_attribute_
20+
else:
21+
id_attribute = None
22+
23+
schema_resource = referencing.Resource.from_contents(
24+
schema, default_specification=DRAFT202012
25+
)
26+
# mypy does not recognize that Registry is an `attrs` class and has `retrieve` as an
27+
# argument to its implicit initializer
28+
registry: referencing.Registry = referencing.Registry( # type: ignore[call-arg]
29+
retrieve=create_retrieve_callable(parsers, retrieval_uri, id_attribute)
30+
)
31+
32+
if retrieval_uri is not None:
33+
registry = registry.with_resource(uri=retrieval_uri, resource=schema_resource)
34+
if id_attribute is not None:
35+
registry = registry.with_resource(uri=id_attribute, resource=schema_resource)
36+
37+
return registry
38+
39+
40+
def create_retrieve_callable(
41+
parser_set: ParserSet, retrieval_uri: str | None, id_attribute: str | None
42+
) -> t.Callable[[str], referencing.Resource[Schema]]:
43+
base_uri = id_attribute
44+
if base_uri is None:
45+
base_uri = retrieval_uri
46+
47+
def get_local_file(uri: str) -> t.Any:
48+
path = filename2path(uri)
49+
return parser_set.parse_file(path, "json")
50+
51+
def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
52+
scheme = urllib.parse.urlsplit(uri).scheme
53+
if scheme == "" and base_uri is not None:
54+
full_uri = urllib.parse.urljoin(base_uri, uri)
55+
else:
56+
full_uri = uri
57+
58+
full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
59+
if full_uri_scheme in ("http", "https"):
60+
data = requests.get(full_uri, stream=True)
61+
parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json")
62+
else:
63+
parsed_object = get_local_file(full_uri)
64+
65+
return referencing.Resource.from_contents(
66+
parsed_object, default_specification=DRAFT202012
67+
)
68+
69+
return retrieve_reference

tests/acceptance/conftest.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
1+
import textwrap
2+
13
import pytest
24
from click.testing import CliRunner
35

46
from check_jsonschema import main as cli_main
57

68

9+
def _render_result(result):
10+
return f"""
11+
output:
12+
{textwrap.indent(result.output, " ")}
13+
14+
stderr:
15+
{textwrap.indent(result.stderr, " ")}
16+
"""
17+
18+
719
@pytest.fixture
820
def cli_runner():
921
return CliRunner(mix_stderr=False)
@@ -22,8 +34,14 @@ def func(cli_args, *args, **kwargs):
2234

2335
@pytest.fixture
2436
def run_line_simple(run_line):
25-
def func(cli_args, *args, **kwargs):
26-
res = run_line(["check-jsonschema"] + cli_args, *args, **kwargs)
27-
assert res.exit_code == 0
37+
def func(cli_args, *args, full_traceback: bool = True, **kwargs):
38+
res = run_line(
39+
["check-jsonschema"]
40+
+ (["--traceback-mode", "full"] if full_traceback else [])
41+
+ cli_args,
42+
*args,
43+
**kwargs,
44+
)
45+
assert res.exit_code == 0, _render_result(res)
2846

2947
return func

tests/acceptance/test_nonjson_schema_handling.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@
3131

3232

3333
@pytest.mark.parametrize("passing_data", [True, False])
34-
def test_warning_on_yaml_reference_passes(run_line, tmp_path, passing_data):
34+
def test_yaml_reference(run_line, tmp_path, passing_data):
3535
main_schemafile = tmp_path / "main_schema.json"
3636
main_schemafile.write_text(json.dumps(YAML_REF_MAIN_SCHEMA))
37+
# JSON is a subset of YAML, so this works for generated YAML
3738
ref_schema = tmp_path / "title_schema.yaml"
3839
ref_schema.write_text(json.dumps(TITLE_SCHEMA))
3940

@@ -47,14 +48,11 @@ def test_warning_on_yaml_reference_passes(run_line, tmp_path, passing_data):
4748
["check-jsonschema", "--schemafile", str(main_schemafile), str(doc)]
4849
)
4950
assert result.exit_code == (0 if passing_data else 1)
50-
assert (
51-
"WARNING: You appear to be using a schema which references a YAML file"
52-
in result.stderr
53-
)
5451

5552

53+
@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5")
5654
@pytest.mark.parametrize("passing_data", [True, False])
57-
def test_warning_on_json5_reference(run_line, tmp_path, passing_data):
55+
def test_json5_reference(run_line, tmp_path, passing_data):
5856
main_schemafile = tmp_path / "main_schema.json"
5957
main_schemafile.write_text(json.dumps(JSON5_REF_MAIN_SCHEMA))
6058
ref_schema = tmp_path / "title_schema.json5"
@@ -70,10 +68,6 @@ def test_warning_on_json5_reference(run_line, tmp_path, passing_data):
7068
["check-jsonschema", "--schemafile", str(main_schemafile), str(doc)]
7169
)
7270
assert result.exit_code == (0 if passing_data else 1)
73-
assert (
74-
"WARNING: You appear to be using a schema which references a JSON5 file"
75-
in result.stderr
76-
)
7771

7872

7973
@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5")

0 commit comments

Comments
 (0)