Skip to content

Commit

Permalink
Merge branch 'main' into vendor-schemas-auto
Browse files Browse the repository at this point in the history
  • Loading branch information
sirosen authored Aug 8, 2023
2 parents da36ef8 + 2d6f144 commit 3eb94d3
Show file tree
Hide file tree
Showing 16 changed files with 410 additions and 97 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
py: ["3.x"]
include:
- toxenv: py-mindeps
py: "3.7"
py: "3.8"

runs-on: ubuntu-latest
name: "Run '${{ matrix.toxenv }}' on python ${{ matrix.py }}"
Expand Down Expand Up @@ -40,7 +40,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
py: ['3.7', '3.8', '3.9', '3.10', '3.11']
py: ['3.8', '3.9', '3.10', '3.11']
name: "Run tests on ${{ matrix.os }}, py${{ matrix.py }}"
runs-on: ${{ matrix.os }}
steps:
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@ Unreleased
.. vendor-insert-here
- Update vendored schemas (2023-08-08)
- Remove support for python3.7
- The minimum supported version of the `jsonschema` library is now `4.18.0`,
which introduces new `$ref` resolution behavior and fixes. That behavior is
used in all cases, which should result in faster evaluation especially on
large schemas.
- `$ref` usage may now refer to YAML, TOML, or JSON5 files, or any other
non-JSON format supported by `check-jsonschema`. The file type is inferred
only from the file extension in these cases and defaults to JSON if there is
no recognizable extension.
- Remote schemafiles (http/s) now support YAML, TOML, and JSON5 formats, if the
URL ends with the appropriate extension and the matching parser is available.
Extensionless URLs are treated as JSON.

0.23.3
------
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ python_requires = >=3.7
install_requires =
importlib-resources>=1.4.0;python_version<"3.9"
ruamel.yaml==0.17.32
jsonschema>=4.5.1,<5.0
jsonschema>=4.18.0,<5.0
requests<3.0
click>=8,<9
package_dir=
Expand Down
9 changes: 7 additions & 2 deletions src/check_jsonschema/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import click
import jsonschema
import referencing.exceptions

from . import utils
from .formats import FormatOptions
Expand Down Expand Up @@ -47,7 +48,7 @@ def _fail(self, msg: str, err: Exception | None = None) -> t.NoReturn:

def get_validator(
self, path: pathlib.Path, doc: dict[str, t.Any]
) -> jsonschema.Validator:
) -> jsonschema.protocols.Validator:
try:
return self._schema_loader.get_validator(
path, doc, self._format_opts, self._fill_defaults
Expand Down Expand Up @@ -75,7 +76,11 @@ def _build_result(self) -> CheckResult:
def _run(self) -> None:
try:
result = self._build_result()
except jsonschema.RefResolutionError as e:
except (
referencing.exceptions.NoSuchResource,
referencing.exceptions.Unretrievable,
referencing.exceptions.Unresolvable,
) as e:
self._fail("Failure resolving $ref within schema\n", e)

self._reporter.report_result(result)
Expand Down
7 changes: 5 additions & 2 deletions src/check_jsonschema/identify_filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
}


def path_to_type(path: pathlib.Path, *, default_type: str = "json") -> str:
ext = path.suffix.lstrip(".")
def path_to_type(path: str | pathlib.Path, *, default_type: str = "json") -> str:
if isinstance(path, str):
ext = path.rpartition(".")[2]
else:
ext = path.suffix.lstrip(".")

if ext in _EXTENSION_MAP:
return _EXTENSION_MAP[ext]
Expand Down
13 changes: 9 additions & 4 deletions src/check_jsonschema/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(
}

def get(
self, path: pathlib.Path, default_filetype: str
self, path: pathlib.Path | str, default_filetype: str
) -> t.Callable[[t.BinaryIO], t.Any]:
filetype = path_to_type(path, default_type=default_filetype)

Expand All @@ -82,10 +82,15 @@ def get(
+ ",".join(self._by_tag.keys())
)

def parse_file(self, path: pathlib.Path, default_filetype: str) -> t.Any:
def parse_data_with_path(
self, data: t.BinaryIO, path: pathlib.Path | str, default_filetype: str
) -> t.Any:
loadfunc = self.get(path, default_filetype)
try:
with open(path, "rb") as fp:
return loadfunc(fp)
return loadfunc(data)
except LOADING_FAILURE_ERROR_TYPES as e:
raise FailedFileLoadError(f"Failed to parse {path}") from e

def parse_file(self, path: pathlib.Path | str, default_filetype: str) -> t.Any:
with open(path, "rb") as fp:
return self.parse_data_with_path(fp, path, default_filetype)
37 changes: 21 additions & 16 deletions src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@

from ..builtin_schemas import get_builtin_schema
from ..formats import FormatOptions, make_format_checker
from ..parsers import ParserSet
from ..utils import is_url_ish
from .errors import UnsupportedUrlScheme
from .readers import HttpSchemaReader, LocalSchemaReader
from .resolver import make_ref_resolver
from .resolver import make_reference_registry


def _extend_with_default(
validator_class: type[jsonschema.Validator],
validator_class: type[jsonschema.protocols.Validator],
) -> type[jsonschema.Validator]:
validate_properties = validator_class.VALIDATORS["properties"]

Expand Down Expand Up @@ -50,7 +51,7 @@ def get_validator(
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.Validator:
) -> jsonschema.protocols.Validator:
raise NotImplementedError


Expand All @@ -71,6 +72,9 @@ def __init__(
if is_url_ish(self.schemafile):
self.url_info = urllib.parse.urlparse(self.schemafile)

# setup a parser collection
self._parsers = ParserSet()

# setup a schema reader lazily, when needed
self._reader: LocalSchemaReader | HttpSchemaReader | None = None

Expand All @@ -96,8 +100,8 @@ def _get_schema_reader(self) -> LocalSchemaReader | HttpSchemaReader:
f"detected parsed URL had an unrecognized scheme: {self.url_info}"
)

def get_schema_ref_base(self) -> str | None:
return self.reader.get_ref_base()
def get_schema_retrieval_uri(self) -> str | None:
return self.reader.get_retrieval_uri()

def get_schema(self) -> dict[str, t.Any]:
return self.reader.read_schema()
Expand All @@ -108,20 +112,20 @@ def get_validator(
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.Validator:
schema_uri = self.get_schema_ref_base()
) -> jsonschema.protocols.Validator:
retrieval_uri = self.get_schema_retrieval_uri()
schema = self.get_schema()

schema_dialect = schema.get("$schema")

# format checker (which may be None)
format_checker = make_format_checker(format_opts, schema_dialect)

# ref resolver which may be built from the schema path
# if the location is a URL, there's no change, but if it's a file path
# it's made absolute and URI-ized
# the resolver should use `$id` if there is one present in the schema
ref_resolver = make_ref_resolver(schema_uri, schema)
# reference resolution
# with support for YAML, TOML, and other formats from the parsers
reference_registry = make_reference_registry(
self._parsers, retrieval_uri, schema
)

# get the correct validator class and check the schema under its metaschema
validator_cls = jsonschema.validators.validator_for(schema)
Expand All @@ -134,17 +138,18 @@ def get_validator(
# now that we know it's safe to try to create the validator instance, do it
validator = validator_cls(
schema,
resolver=ref_resolver,
registry=reference_registry,
format_checker=format_checker,
)
return t.cast(jsonschema.Validator, validator)
return t.cast(jsonschema.protocols.Validator, validator)


class BuiltinSchemaLoader(SchemaLoader):
def __init__(self, schema_name: str) -> None:
self.schema_name = schema_name
self._parsers = ParserSet()

def get_schema_ref_base(self) -> str | None:
def get_schema_retrieval_uri(self) -> str | None:
return None

def get_schema(self) -> dict[str, t.Any]:
Expand All @@ -158,7 +163,7 @@ def get_validator(
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.Validator:
) -> jsonschema.protocols.Validator:
schema_validator = jsonschema.validators.validator_for(instance_doc)
meta_validator_class = jsonschema.validators.validator_for(
schema_validator.META_SCHEMA, default=schema_validator
Expand Down
23 changes: 15 additions & 8 deletions src/check_jsonschema/schema_loader/readers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

import json
import io
import typing as t

import ruamel.yaml
Expand All @@ -25,14 +25,12 @@ def _run_load_callback(schema_location: str, callback: t.Callable) -> dict:


class LocalSchemaReader:
FORMATS = ("json", "json5", "yaml")

def __init__(self, filename: str) -> None:
self.path = filename2path(filename)
self.filename = str(self.path)
self.parsers = ParserSet(supported_formats=self.FORMATS)
self.parsers = ParserSet()

def get_ref_base(self) -> str:
def get_retrieval_uri(self) -> str:
return self.path.as_uri()

def _read_impl(self) -> t.Any:
Expand All @@ -50,19 +48,28 @@ def __init__(
disable_cache: bool,
) -> None:
self.url = url
self.parsers = ParserSet()
self.downloader = CacheDownloader(
url,
cache_filename,
disable_cache=disable_cache,
validation_callback=json.loads,
validation_callback=self._parse,
)
self._parsed_schema: t.Any | None = None

def _parse(self, schema_bytes: bytes) -> t.Any:
if self._parsed_schema is None:
self._parsed_schema = self.parsers.parse_data_with_path(
io.BytesIO(schema_bytes), self.url, default_filetype="json"
)
return self._parsed_schema

def get_ref_base(self) -> str:
def get_retrieval_uri(self) -> str:
return self.url

def _read_impl(self) -> t.Any:
with self.downloader.open() as fp:
return json.load(fp)
return self._parse(fp.read())

def read_schema(self) -> dict:
return _run_load_callback(self.url, self._read_impl)
103 changes: 65 additions & 38 deletions src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,69 @@
from __future__ import annotations

import typing as t
import urllib.parse

import click
import jsonschema


class _CliRefResolver(jsonschema.RefResolver):
def resolve_remote(self, uri: str) -> t.Any:
if uri.endswith(".yaml") or uri.endswith(".yml"):
click.secho(
"""\
WARNING: You appear to be using a schema which references a YAML file.
This is not supported by check-jsonschema and may result in errors.
""",
err=True,
fg="yellow",
)
elif uri.endswith(".json5"):
click.secho(
"""\
WARNING: You appear to be using a schema which references a JSON5 file.
This is not supported by check-jsonschema and may result in errors.
""",
err=True,
fg="yellow",
)
return super().resolve_remote(uri)


def make_ref_resolver(
schema_uri: str | None, schema: dict
) -> jsonschema.RefResolver | None:
if not schema_uri:
return None

base_uri = schema.get("$id", schema_uri)
# FIXME: temporary type-ignore because typeshed has the type wrong
return _CliRefResolver(base_uri, schema) # type: ignore[arg-type]
import referencing
import requests
from referencing.jsonschema import DRAFT202012, Schema

from ..parsers import ParserSet
from ..utils import filename2path


def make_reference_registry(
parsers: ParserSet, retrieval_uri: str | None, schema: dict
) -> referencing.Registry:
id_attribute_: t.Any = schema.get("$id")
if isinstance(id_attribute_, str):
id_attribute: str | None = id_attribute_
else:
id_attribute = None

schema_resource = referencing.Resource.from_contents(
schema, default_specification=DRAFT202012
)
# mypy does not recognize that Registry is an `attrs` class and has `retrieve` as an
# argument to its implicit initializer
registry: referencing.Registry = referencing.Registry( # type: ignore[call-arg]
retrieve=create_retrieve_callable(parsers, retrieval_uri, id_attribute)
)

if retrieval_uri is not None:
registry = registry.with_resource(uri=retrieval_uri, resource=schema_resource)
if id_attribute is not None:
registry = registry.with_resource(uri=id_attribute, resource=schema_resource)

return registry


def create_retrieve_callable(
parser_set: ParserSet, retrieval_uri: str | None, id_attribute: str | None
) -> t.Callable[[str], referencing.Resource[Schema]]:
base_uri = id_attribute
if base_uri is None:
base_uri = retrieval_uri

def get_local_file(uri: str) -> t.Any:
path = filename2path(uri)
return parser_set.parse_file(path, "json")

def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
scheme = urllib.parse.urlsplit(uri).scheme
if scheme == "" and base_uri is not None:
full_uri = urllib.parse.urljoin(base_uri, uri)
else:
full_uri = uri

full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
if full_uri_scheme in ("http", "https"):
data = requests.get(full_uri, stream=True)
parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json")
else:
parsed_object = get_local_file(full_uri)

return referencing.Resource.from_contents(
parsed_object, default_specification=DRAFT202012
)

return retrieve_reference
Loading

0 comments on commit 3eb94d3

Please sign in to comment.