Skip to content

Commit

Permalink
Merge pull request #289 from python-jsonschema/use-referencing
Browse files Browse the repository at this point in the history
Use the new 'referencing' implementation in 'jsonschema'
  • Loading branch information
sirosen authored Aug 8, 2023
2 parents 0486f68 + 547f86b commit 76b19fe
Show file tree
Hide file tree
Showing 15 changed files with 376 additions and 82 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
py: ["3.x"]
include:
- toxenv: py-mindeps
py: "3.7"
py: "3.8"

runs-on: ubuntu-latest
name: "Run '${{ matrix.toxenv }}' on python ${{ matrix.py }}"
Expand Down Expand Up @@ -40,7 +40,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
py: ['3.7', '3.8', '3.9', '3.10', '3.11']
py: ['3.8', '3.9', '3.10', '3.11']
name: "Run tests on ${{ matrix.os }}, py${{ matrix.py }}"
runs-on: ${{ matrix.os }}
steps:
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ Unreleased
.. vendor-insert-here
- Update vendored schemas (2023-07-18)
- Remove support for python3.7
- The minimum supported version of the `jsonschema` library is now `4.18.0`,
which introduces new `$ref` resolution behavior and fixes. That behavior is
used in all cases, which should result in faster evaluation especially on
large schemas.
- `$ref` usage may now refer to YAML, TOML, or JSON5 files, or any other
non-JSON format supported by `check-jsonschema`. The file type is inferred
only from the file extension in these cases and defaults to JSON if there is
no recognizable extension.

0.23.3
------
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python_requires = >=3.7
install_requires =
importlib-resources>=1.4.0;python_version<"3.9"
ruamel.yaml==0.17.32
jsonschema>=4.5.1,<5.0
jsonschema>=4.18.0,<5.0
requests<3.0
click>=8,<9
package_dir=
Expand Down
7 changes: 6 additions & 1 deletion src/check_jsonschema/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import click
import jsonschema
import referencing.exceptions

from . import utils
from .formats import FormatOptions
Expand Down Expand Up @@ -75,7 +76,11 @@ def _build_result(self) -> CheckResult:
def _run(self) -> None:
try:
result = self._build_result()
except jsonschema.RefResolutionError as e:
except (
referencing.exceptions.NoSuchResource,
referencing.exceptions.Unretrievable,
referencing.exceptions.Unresolvable,
) as e:
self._fail("Failure resolving $ref within schema\n", e)

self._reporter.report_result(result)
Expand Down
7 changes: 5 additions & 2 deletions src/check_jsonschema/identify_filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
}


def path_to_type(path: pathlib.Path, *, default_type: str = "json") -> str:
ext = path.suffix.lstrip(".")
def path_to_type(path: str | pathlib.Path, *, default_type: str = "json") -> str:
if isinstance(path, str):
ext = path.rpartition(".")[2]
else:
ext = path.suffix.lstrip(".")

if ext in _EXTENSION_MAP:
return _EXTENSION_MAP[ext]
Expand Down
13 changes: 9 additions & 4 deletions src/check_jsonschema/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
}

def get(
self, path: pathlib.Path, default_filetype: str
self, path: pathlib.Path | str, default_filetype: str
) -> t.Callable[[t.BinaryIO], t.Any]:
filetype = path_to_type(path, default_type=default_filetype)

Expand All @@ -82,10 +82,15 @@ def get(
+ ",".join(self._by_tag.keys())
)

def parse_file(self, path: pathlib.Path, default_filetype: str) -> t.Any:
def parse_data_with_path(
self, data: t.BinaryIO, path: pathlib.Path | str, default_filetype: str
) -> t.Any:
loadfunc = self.get(path, default_filetype)
try:
with open(path, "rb") as fp:
return loadfunc(fp)
return loadfunc(data)
except LOADING_FAILURE_ERROR_TYPES as e:
raise FailedFileLoadError(f"Failed to parse {path}") from e

def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any:
with open(path, "rb") as fp:
return self.parse_data_with_path(fp, path, default_filetype)
27 changes: 16 additions & 11 deletions src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@

from ..builtin_schemas import get_builtin_schema
from ..formats import FormatOptions, make_format_checker
from ..parsers import ParserSet
from ..utils import is_url_ish
from .errors import UnsupportedUrlScheme
from .readers import HttpSchemaReader, LocalSchemaReader
from .resolver import make_ref_resolver
from .resolver import make_reference_registry


def _extend_with_default(
Expand Down Expand Up @@ -71,6 +72,9 @@ def __init__(
if is_url_ish(self.schemafile):
self.url_info = urllib.parse.urlparse(self.schemafile)

# setup a parser collection
self._parsers = ParserSet()

# setup a schema reader lazily, when needed
self._reader: LocalSchemaReader | HttpSchemaReader | None = None

Expand All @@ -96,8 +100,8 @@ def _get_schema_reader(self) -> LocalSchemaReader | HttpSchemaReader:
f"detected parsed URL had an unrecognized scheme: {self.url_info}"
)

def get_schema_ref_base(self) -> str | None:
return self.reader.get_ref_base()
def get_schema_retrieval_uri(self) -> str | None:
return self.reader.get_retrieval_uri()

def get_schema(self) -> dict[str, t.Any]:
return self.reader.read_schema()
Expand All @@ -109,19 +113,19 @@ def get_validator(
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.Validator:
schema_uri = self.get_schema_ref_base()
retrieval_uri = self.get_schema_retrieval_uri()
schema = self.get_schema()

schema_dialect = schema.get("$schema")

# format checker (which may be None)
format_checker = make_format_checker(format_opts, schema_dialect)

# ref resolver which may be built from the schema path
# if the location is a URL, there's no change, but if it's a file path
# it's made absolute and URI-ized
# the resolver should use `$id` if there is one present in the schema
ref_resolver = make_ref_resolver(schema_uri, schema)
# reference resolution
# with support for YAML, TOML, and other formats from the parsers
reference_registry = make_reference_registry(
self._parsers, retrieval_uri, schema
)

# get the correct validator class and check the schema under its metaschema
validator_cls = jsonschema.validators.validator_for(schema)
Expand All @@ -134,7 +138,7 @@ def get_validator(
# now that we know it's safe to try to create the validator instance, do it
validator = validator_cls(
schema,
resolver=ref_resolver,
registry=reference_registry,
format_checker=format_checker,
)
return t.cast(jsonschema.Validator, validator)
Expand All @@ -143,8 +147,9 @@ def get_validator(
class BuiltinSchemaLoader(SchemaLoader):
def __init__(self, schema_name: str) -> None:
self.schema_name = schema_name
self._parsers = ParserSet()

def get_schema_ref_base(self) -> str | None:
def get_schema_retrieval_uri(self) -> str | None:
return None

def get_schema(self) -> dict[str, t.Any]:
Expand Down
8 changes: 3 additions & 5 deletions src/check_jsonschema/schema_loader/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,12 @@ def _run_load_callback(schema_location: str, callback: t.Callable) -> dict:


class LocalSchemaReader:
FORMATS = ("json", "json5", "yaml")

def __init__(self, filename: str) -> None:
self.path = filename2path(filename)
self.filename = str(self.path)
self.parsers = ParserSet(supported_formats=self.FORMATS)
self.parsers = ParserSet()

def get_ref_base(self) -> str:
def get_retrieval_uri(self) -> str:
return self.path.as_uri()

def _read_impl(self) -> t.Any:
Expand All @@ -57,7 +55,7 @@ def __init__(
validation_callback=json.loads,
)

def get_ref_base(self) -> str:
def get_retrieval_uri(self) -> str:
return self.url

def _read_impl(self) -> t.Any:
Expand Down
103 changes: 65 additions & 38 deletions src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,69 @@
from __future__ import annotations

import typing as t
import urllib.parse

import click
import jsonschema


class _CliRefResolver(jsonschema.RefResolver):
def resolve_remote(self, uri: str) -> t.Any:
if uri.endswith(".yaml") or uri.endswith(".yml"):
click.secho(
"""\
WARNING: You appear to be using a schema which references a YAML file.
This is not supported by check-jsonschema and may result in errors.
""",
err=True,
fg="yellow",
)
elif uri.endswith(".json5"):
click.secho(
"""\
WARNING: You appear to be using a schema which references a JSON5 file.
This is not supported by check-jsonschema and may result in errors.
""",
err=True,
fg="yellow",
)
return super().resolve_remote(uri)


def make_ref_resolver(
schema_uri: str | None, schema: dict
) -> jsonschema.RefResolver | None:
if not schema_uri:
return None

base_uri = schema.get("$id", schema_uri)
# FIXME: temporary type-ignore because typeshed has the type wrong
return _CliRefResolver(base_uri, schema) # type: ignore[arg-type]
import referencing
import requests
from referencing.jsonschema import DRAFT202012, Schema

from ..parsers import ParserSet
from ..utils import filename2path


def make_reference_registry(
parsers: ParserSet, retrieval_uri: str | None, schema: dict
) -> referencing.Registry:
id_attribute_: t.Any = schema.get("$id")
if isinstance(id_attribute_, str):
id_attribute: str | None = id_attribute_
else:
id_attribute = None

schema_resource = referencing.Resource.from_contents(
schema, default_specification=DRAFT202012
)
# mypy does not recognize that Registry is an `attrs` class and has `retrieve` as an
# argument to its implicit initializer
registry: referencing.Registry = referencing.Registry( # type: ignore[call-arg]
retrieve=create_retrieve_callable(parsers, retrieval_uri, id_attribute)
)

if retrieval_uri is not None:
registry = registry.with_resource(uri=retrieval_uri, resource=schema_resource)
if id_attribute is not None:
registry = registry.with_resource(uri=id_attribute, resource=schema_resource)

return registry


def create_retrieve_callable(
parser_set: ParserSet, retrieval_uri: str | None, id_attribute: str | None
) -> t.Callable[[str], referencing.Resource[Schema]]:
base_uri = id_attribute
if base_uri is None:
base_uri = retrieval_uri

def get_local_file(uri: str) -> t.Any:
path = filename2path(uri)
return parser_set.parse_file(path, "json")

def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
scheme = urllib.parse.urlsplit(uri).scheme
if scheme == "" and base_uri is not None:
full_uri = urllib.parse.urljoin(base_uri, uri)
else:
full_uri = uri

full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
if full_uri_scheme in ("http", "https"):
data = requests.get(full_uri, stream=True)
parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json")
else:
parsed_object = get_local_file(full_uri)

return referencing.Resource.from_contents(
parsed_object, default_specification=DRAFT202012
)

return retrieve_reference
24 changes: 21 additions & 3 deletions tests/acceptance/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
import textwrap

import pytest
from click.testing import CliRunner

from check_jsonschema import main as cli_main


def _render_result(result):
return f"""
output:
{textwrap.indent(result.output, " ")}
stderr:
{textwrap.indent(result.stderr, " ")}
"""


@pytest.fixture
def cli_runner():
return CliRunner(mix_stderr=False)
Expand All @@ -22,8 +34,14 @@ def func(cli_args, *args, **kwargs):

@pytest.fixture
def run_line_simple(run_line):
def func(cli_args, *args, **kwargs):
res = run_line(["check-jsonschema"] + cli_args, *args, **kwargs)
assert res.exit_code == 0
def func(cli_args, *args, full_traceback: bool = True, **kwargs):
res = run_line(
["check-jsonschema"]
+ (["--traceback-mode", "full"] if full_traceback else [])
+ cli_args,
*args,
**kwargs,
)
assert res.exit_code == 0, _render_result(res)

return func
14 changes: 4 additions & 10 deletions tests/acceptance/test_nonjson_schema_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@


@pytest.mark.parametrize("passing_data", [True, False])
def test_warning_on_yaml_reference_passes(run_line, tmp_path, passing_data):
def test_yaml_reference(run_line, tmp_path, passing_data):
main_schemafile = tmp_path / "main_schema.json"
main_schemafile.write_text(json.dumps(YAML_REF_MAIN_SCHEMA))
# JSON is a subset of YAML, so this works for generated YAML
ref_schema = tmp_path / "title_schema.yaml"
ref_schema.write_text(json.dumps(TITLE_SCHEMA))

Expand All @@ -47,14 +48,11 @@ def test_warning_on_yaml_reference_passes(run_line, tmp_path, passing_data):
["check-jsonschema", "--schemafile", str(main_schemafile), str(doc)]
)
assert result.exit_code == (0 if passing_data else 1)
assert (
"WARNING: You appear to be using a schema which references a YAML file"
in result.stderr
)


@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5")
@pytest.mark.parametrize("passing_data", [True, False])
def test_warning_on_json5_reference(run_line, tmp_path, passing_data):
def test_json5_reference(run_line, tmp_path, passing_data):
main_schemafile = tmp_path / "main_schema.json"
main_schemafile.write_text(json.dumps(JSON5_REF_MAIN_SCHEMA))
ref_schema = tmp_path / "title_schema.json5"
Expand All @@ -70,10 +68,6 @@ def test_warning_on_json5_reference(run_line, tmp_path, passing_data):
["check-jsonschema", "--schemafile", str(main_schemafile), str(doc)]
)
assert result.exit_code == (0 if passing_data else 1)
assert (
"WARNING: You appear to be using a schema which references a JSON5 file"
in result.stderr
)


@pytest.mark.skipif(not JSON5_ENABLED, reason="test requires json5")
Expand Down
Loading

0 comments on commit 76b19fe

Please sign in to comment.