From e23446b2810994fa5792a4783e0b796da8233ca8 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Wed, 18 Jun 2025 14:04:57 +0200 Subject: [PATCH 1/6] fix: typo _validata_data Signed-off-by: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> --- cyclonedx/validation/json.py | 4 ++-- cyclonedx/validation/xml.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cyclonedx/validation/json.py b/cyclonedx/validation/json.py index 7e6dcf0b..c4898ae0 100644 --- a/cyclonedx/validation/json.py +++ b/cyclonedx/validation/json.py @@ -64,10 +64,10 @@ def validate_str(self, data: str) -> Optional[ValidationError]: else: def validate_str(self, data: str) -> Optional[ValidationError]: - return self._validata_data( + return self._validate_data( json_loads(data)) - def _validata_data(self, data: Any) -> Optional[ValidationError]: + def _validate_data(self, data: Any) -> Optional[ValidationError]: validator = self._validator # may throw on error that MUST NOT be caught try: validator.validate(data) diff --git a/cyclonedx/validation/xml.py b/cyclonedx/validation/xml.py index 6df74244..2c0a6730 100644 --- a/cyclonedx/validation/xml.py +++ b/cyclonedx/validation/xml.py @@ -60,12 +60,12 @@ def validate_str(self, data: str) -> Optional[ValidationError]: raise self.__MDERROR[0] from self.__MDERROR[1] else: def validate_str(self, data: str) -> Optional[ValidationError]: - return self._validata_data( + return self._validate_data( xml_fromstring( # nosec B320 bytes(data, encoding='utf8'), parser=self.__xml_parser)) - def _validata_data(self, data: Any) -> Optional[ValidationError]: + def _validate_data(self, data: Any) -> Optional[ValidationError]: validator = self._validator # may throw on error that MUST NOT be caught if not validator.validate(data): return ValidationError(validator.error_log.last_error) From c7351a60f64b5ee5bc1a427f6cbedca9128a8659 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 19 Jun 2025 14:11:58 +0200 Subject: [PATCH 2/6] feat: add new properties to ValidationError ValidationError had a single untyped property: .data, which hold different objects in the XML and JSON cases, while they do have some common properties. The new properties introduced in this commit are - .message: the error message text - .path: pointer to the location of the error in the input Signed-off-by: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> --- cyclonedx/validation/__init__.py | 18 +++++++++++++++++- cyclonedx/validation/json.py | 16 +++++++++++++--- tests/test_validation_json.py | 2 ++ tests/test_validation_xml.py | 2 ++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/cyclonedx/validation/__init__.py b/cyclonedx/validation/__init__.py index 4f5c775f..fa87f30b 100644 --- a/cyclonedx/validation/__init__.py +++ b/cyclonedx/validation/__init__.py @@ -30,10 +30,26 @@ class ValidationError: """Validation failed with this specific error. - Use :attr:`~data` to access the content. + You can use :attr:`~data` to access the raw error object, but prefer + other properties and functions, if possible. """ data: Any + """Raw error data from one of the validation libraries.""" + + @property + def message(self) -> str: + """The error message.""" + return str(getattr(self.data, 'message', self)) + + @property + def path(self) -> str: + """Path to the location of the problem in the document. + + An XPath/JSONPath string. + """ + # only subclasses know how to extract this info + return str(getattr(self.data, 'path', '')) def __init__(self, data: Any) -> None: self.data = data diff --git a/cyclonedx/validation/json.py b/cyclonedx/validation/json.py index c4898ae0..ff3d7b57 100644 --- a/cyclonedx/validation/json.py +++ b/cyclonedx/validation/json.py @@ -33,7 +33,7 @@ _missing_deps_error: Optional[tuple[MissingOptionalDependencyException, ImportError]] = None try: - from jsonschema.exceptions import ValidationError as JsonValidationError # type:ignore[import-untyped] + from jsonschema.exceptions import ValidationError as JsonSchemaValidationError # type:ignore[import-untyped] from jsonschema.validators import Draft7Validator # type:ignore[import-untyped] from referencing import Registry from referencing.jsonschema import DRAFT7 @@ -47,6 +47,16 @@ ), err +class _JsonValidationError(ValidationError): + @property + def path(self) -> str: + """Path to the location of the problem in the document. + + An XPath/JSONPath string. + """ + return str(getattr(self.data, 'json_path', '')) + + class _BaseJsonValidator(BaseSchemabasedValidator, ABC): @property def output_format(self) -> Literal[OutputFormat.JSON]: @@ -71,8 +81,8 @@ def _validate_data(self, data: Any) -> Optional[ValidationError]: validator = self._validator # may throw on error that MUST NOT be caught try: validator.validate(data) - except JsonValidationError as error: - return ValidationError(error) + except JsonSchemaValidationError as error: + return _JsonValidationError(error) return None __validator: Optional['JsonSchemaValidator'] = None diff --git a/tests/test_validation_json.py b/tests/test_validation_json.py index 9bc1a7cc..c62fb85c 100644 --- a/tests/test_validation_json.py +++ b/tests/test_validation_json.py @@ -132,3 +132,5 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ self.skipTest('MissingOptionalDependencyException') self.assertIsNotNone(validation_error) self.assertIsNotNone(validation_error.data) + self.assertTrue(bool(validation_error.message)) + self.assertTrue(bool(validation_error.path)) diff --git a/tests/test_validation_xml.py b/tests/test_validation_xml.py index 81a56cce..f7a5e34c 100644 --- a/tests/test_validation_xml.py +++ b/tests/test_validation_xml.py @@ -92,3 +92,5 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ self.skipTest('MissingOptionalDependencyException') self.assertIsNotNone(validation_error) self.assertIsNotNone(validation_error.data) + self.assertTrue(bool(validation_error.message)) + self.assertTrue(bool(validation_error.path)) From d9da93dc7604c53ea29bc96f64733cd8e92e6612 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 19 Jun 2025 14:22:58 +0200 Subject: [PATCH 3/6] feat: safe ValidationError message access via .get_squeezed_message() ValidationError.data and .message is provided by third party libraries, and they can give a message of any length. E.g. jsonschema inserts its input in all of its messages, which could be arbitrary big. To be able to show these errors to the user, some pre-processing is needed. The new method allows for squeezing these messages in a way, that is least disruptive, and has special knowledge how to shorten jsonschema messages. It is definitely still a workaround, and ideally the libraries should not yield unlimited messages. Signed-off-by: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> --- cyclonedx/validation/__init__.py | 46 ++++++++++++ cyclonedx/validation/json.py | 37 ++++++++- tests/test_validation.py | 124 ++++++++++++++++++++++++++++++- tests/test_validation_json.py | 4 + tests/test_validation_xml.py | 4 + 5 files changed, 211 insertions(+), 4 deletions(-) diff --git a/cyclonedx/validation/__init__.py b/cyclonedx/validation/__init__.py index fa87f30b..ba95ccb7 100644 --- a/cyclonedx/validation/__init__.py +++ b/cyclonedx/validation/__init__.py @@ -27,6 +27,35 @@ from .xml import XmlValidator +def squeeze(text: str, size: int, replacement: str = ' ... ') -> str: + """Replaces the middle of ``text`` with ``replacement``. + + :param size: the length of the output, -1 to make no squeezing. + :return: potentially shorter text + :retval: ``text`` if ``size`` is -1 (for easy pass-through) + :retval: ``text`` if it is shorter than ``size`` + :retval: ``text`` with the middle of it replaced with ``replacement``, + if ``text`` is longer, than ``size`` + + Raises error if ``replacement`` is longer than ``size``, and replacement + would happen. + """ + if size == -1: + return text + + if size < len(replacement): + raise ValueError(f'squeeze: {size = } < {len(replacement) = }') + + if len(text) <= size: + return text + + left_size = (size - len(replacement)) // 2 + right_size = size - len(replacement) - left_size + right_offset = len(text) - right_size + + return f'{text[:left_size]}{replacement}{text[right_offset:]}' + + class ValidationError: """Validation failed with this specific error. @@ -51,6 +80,23 @@ def path(self) -> str: # only subclasses know how to extract this info return str(getattr(self.data, 'path', '')) + def get_squeezed_message(self, *, context_limit: int = -1, max_size: int = -1, replacement: str = ' ... ') -> str: + """Extracts, and sanitizes the error message. + + Messages can be quite big from underlying libraries, as they sometimes + add context to the error message: both the input or the rule can be big. + + This can be amended both in a generic and library specific ways. + + :param max_size: squeeze message to this size. + :param context_limit: limit of tolerated context length. + :param replacement: to mark place of dropped text bit[s] + + With the defaults, no squeezing happens. + """ + # subclasses may know how to do it better + return squeeze(self.message, max_size, replacement) + def __init__(self, data: Any) -> None: self.data = data diff --git a/cyclonedx/validation/json.py b/cyclonedx/validation/json.py index ff3d7b57..7acb9632 100644 --- a/cyclonedx/validation/json.py +++ b/cyclonedx/validation/json.py @@ -29,7 +29,7 @@ from ..exception import MissingOptionalDependencyException from ..schema._res import BOM_JSON as _S_BOM, BOM_JSON_STRICT as _S_BOM_STRICT, JSF as _S_JSF, SPDX_JSON as _S_SPDX -from . import BaseSchemabasedValidator, SchemabasedValidator, ValidationError +from . import BaseSchemabasedValidator, SchemabasedValidator, ValidationError, squeeze _missing_deps_error: Optional[tuple[MissingOptionalDependencyException, ImportError]] = None try: @@ -47,7 +47,42 @@ ), err +def _get_message_with_squeezed_context(error: 'JsonSchemaValidationError', context_limit: int, replacement: str) -> str: + # The below code depends on jsonschema internals, that messages are created + # like `yield ValidationError(f"{instance!r} has non-unique elements")` + # and tries to replace `{instance!r}` with a shortened version, if needed + message: str = error.message + if context_limit <= 0 or len(message) <= context_limit: + return message + + repr_context = repr(error.instance) + if len(repr_context) <= context_limit: + return message + + return message.replace(repr_context, squeeze(repr_context, context_limit, replacement)) + + class _JsonValidationError(ValidationError): + def get_squeezed_message(self, *, context_limit: int = -1, max_size: int = -1, replacement: str = ' ... ') -> str: + """Extracts, and sanitizes the error message. + + Messages can be quite big from underlying libraries, as they sometimes + add context to the error message.. + + This is amended both in a generic and library specific ways here. + + :param max_size: squeeze message to this size. + :param context_limit: jsonschema messages most of the time include the + instance repr as context, which can be very big + (in the megabytes range), so an attempt is made to + shorten context to this size. + :param replacement: to mark place of dropped text bit[s] + + With the defaults, no squeezing happens. + """ + message = _get_message_with_squeezed_context(self.data, context_limit, replacement) + return squeeze(message, max_size, replacement) + @property def path(self) -> str: """Path to the location of the problem in the document. diff --git a/tests/test_validation.py b/tests/test_validation.py index c4d4e085..a5367875 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -16,13 +16,13 @@ # Copyright (c) OWASP Foundation. All Rights Reserved. +import unittest from itertools import product -from unittest import TestCase from ddt import data, ddt, named_data, unpack from cyclonedx.schema import OutputFormat, SchemaVersion -from cyclonedx.validation import make_schemabased_validator +from cyclonedx.validation import make_schemabased_validator, squeeze UNDEFINED_FORMAT_VERSION = { (OutputFormat.JSON, SchemaVersion.V1_1), @@ -31,7 +31,7 @@ @ddt -class TestGetSchemabasedValidator(TestCase): +class TestGetSchemabasedValidator(unittest.TestCase): @named_data(*([f'{f.name} {v.name}', f, v] for f, v @@ -51,3 +51,121 @@ def test_as_expected(self, of: OutputFormat, sv: SchemaVersion) -> None: def test_fails_on_wrong_args(self, of: OutputFormat, sv: SchemaVersion, raises_regex: tuple) -> None: with self.assertRaisesRegex(*raises_regex): make_schemabased_validator(of, sv) + + +class TestSqueeze(unittest.TestCase): + + def test_squeeze_size_minus_one_returns_original_text(self) -> None: + """Test that size=-1 returns original text unchanged.""" + self.assertEqual(squeeze('hello world', -1), 'hello world') + self.assertEqual(squeeze('', -1), '') + self.assertEqual(squeeze('a', -1), 'a') + self.assertEqual(squeeze('very long text that would normally be squeezed', -1), + 'very long text that would normally be squeezed') + + def test_squeeze_size_zero_returns_empty_text(self) -> None: + """Test that size=-1 returns original text unchanged.""" + self.assertEqual(squeeze('hello world', 0, ''), '') + self.assertEqual(squeeze('', 0, ''), '') + + def test_squeeze_text_shorter_than_or_equal_size_returns_original(self) -> None: + """Test that text shorter than or equal to size returns original text.""" + self.assertEqual(squeeze('hello', 10), 'hello') + self.assertEqual(squeeze('hello', 5), 'hello') + self.assertEqual(squeeze('', 5), '') + self.assertEqual(squeeze('a', 5), 'a') + self.assertEqual(squeeze('ab', 10), 'ab') + + def test_squeeze_with_default_replacement(self) -> None: + """Test squeezing with default ' ... ' replacement.""" + self.assertEqual(squeeze('hello world', 8), 'h ... ld') + self.assertEqual(squeeze('hello world', 7), 'h ... d') + self.assertEqual(squeeze('hello world', 9), 'he ... ld') + self.assertEqual(squeeze('hello world', 10), 'he ... rld') + self.assertEqual(squeeze('hello world', 11), 'hello world') + + def test_squeeze_with_custom_replacement(self) -> None: + """Test squeezing with custom replacement strings.""" + self.assertEqual(squeeze('hello world', 8, '..'), 'hel..rld') + self.assertEqual(squeeze('hello world', 7, '..'), 'he..rld') + self.assertEqual(squeeze('hello world', 9, '---'), 'hel---rld') + self.assertEqual(squeeze('hello world', 10, 'XX'), 'hellXXorld') + + def test_squeeze_with_single_character_replacement(self) -> None: + """Test squeezing with single character replacement.""" + self.assertEqual(squeeze('hello world', 5, '*'), 'he*ld') + self.assertEqual(squeeze('hello world', 6, '*'), 'he*rld') + self.assertEqual(squeeze('hello world', 7, '*'), 'hel*rld') + + def test_squeeze_with_empty_replacement(self) -> None: + """Test squeezing with empty replacement string.""" + self.assertEqual(squeeze('hello world', 5, ''), 'herld') + self.assertEqual(squeeze('hello world', 6, ''), 'helrld') + self.assertEqual(squeeze('hello world', 7, ''), 'helorld') + + def test_squeeze_replacement_equals_target_size(self) -> None: + """Test when replacement string equals the target size.""" + self.assertEqual(squeeze('hello world', 4, '....'), '....') + self.assertEqual(squeeze('hello world', 3, '***'), '***') + + def test_squeeze_very_short_target_sizes(self) -> None: + """Test edge cases with very short target sizes.""" + self.assertEqual(squeeze('hello world', 5, '.'), 'he.ld') + self.assertEqual(squeeze('hello world', 6, '.'), 'he.rld') + self.assertEqual(squeeze('hello world', 1, 'X'), 'X') + + def test_squeeze_with_long_text(self) -> None: + """Test squeezing with very long text.""" + long_text = 'a' * 100 + result = squeeze(long_text, 10, '...') + self.assertEqual(len(result), 10) + self.assertEqual(result, 'aaa...aaaa') + + # Test with different replacement + result2 = squeeze(long_text, 8, '--') + self.assertEqual(len(result2), 8) + self.assertEqual(result2, 'aaa--aaa') + + def test_squeeze_size_distribution_even(self) -> None: + """Test size distribution when remaining space is even.""" + # size=8, replacement="--" (len=2), remaining=6, left=3, right=3 + self.assertEqual(squeeze('abcdefghijk', 8, '--'), 'abc--ijk') + # size=10, replacement="...." (len=4), remaining=6, left=3, right=3 + self.assertEqual(squeeze('abcdefghijk', 10, '....'), 'abc....ijk') + + def test_squeeze_size_distribution_odd(self) -> None: + """Test size distribution when remaining space is odd.""" + # size=9, replacement="--" (len=2), remaining=7, left=3, right=4 + self.assertEqual(squeeze('abcdefghijk', 9, '--'), 'abc--hijk') + # size=11, replacement="..." (len=3), remaining=8, left=4, right=4 + self.assertEqual(squeeze('abcdefghijk', 11, '...'), 'abcdefghijk') + + def test_squeeze_raises_error_when_replacement_too_long(self) -> None: + """Test that ValueError is raised when replacement is longer than target size.""" + with self.assertRaises(ValueError) as context: + squeeze('hello world', 3, ' ... ') + self.assertIn('size = 3 < len(replacement) = 5', str(context.exception)) + + with self.assertRaises(ValueError) as context: + squeeze('hello world', 2, 'abc') + self.assertIn('size = 2 < len(replacement) = 3', str(context.exception)) + + with self.assertRaises(ValueError) as context: + squeeze('hello world', 1, 'ab') + self.assertIn('size = 1 < len(replacement) = 2', str(context.exception)) + + def test_squeeze_error_when_replacement_long_but_no_squeeze_needed(self) -> None: + """Test that no error is raised when replacement is long but text doesn't need squeezing.""" + # Text is shorter than size, so no squeezing would occur, + # yet, the replacement is longer than the requested size, so error is raised + with self.assertRaises(ValueError) as context: + self.assertEqual(squeeze('abc', 10, 'very long replacement'), 'abc') + self.assertIn('size = 10 < len(replacement) = 21', str(context.exception)) + + with self.assertRaises(ValueError) as context: + self.assertEqual(squeeze('', 3, 'abcd'), '') + self.assertIn('size = 3 < len(replacement) = 4', str(context.exception)) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_validation_json.py b/tests/test_validation_json.py index c62fb85c..f05c4e2b 100644 --- a/tests/test_validation_json.py +++ b/tests/test_validation_json.py @@ -133,4 +133,8 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ self.assertIsNotNone(validation_error) self.assertIsNotNone(validation_error.data) self.assertTrue(bool(validation_error.message)) + self.assertTrue(bool(validation_error.get_squeezed_message(context_limit=22))) self.assertTrue(bool(validation_error.path)) + + squeezed_message = validation_error.get_squeezed_message(max_size=100) + self.assertLessEqual(len(squeezed_message), 100, squeezed_message) diff --git a/tests/test_validation_xml.py b/tests/test_validation_xml.py index f7a5e34c..8f774362 100644 --- a/tests/test_validation_xml.py +++ b/tests/test_validation_xml.py @@ -93,4 +93,8 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ self.assertIsNotNone(validation_error) self.assertIsNotNone(validation_error.data) self.assertTrue(bool(validation_error.message)) + self.assertTrue(bool(validation_error.get_squeezed_message())) self.assertTrue(bool(validation_error.path)) + + squeezed_message = validation_error.get_squeezed_message(max_size=100) + self.assertLessEqual(len(squeezed_message), 100, squeezed_message) From 989056128adedc61a0037423f5435cd544e58376 Mon Sep 17 00:00:00 2001 From: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> Date: Thu, 19 Jun 2025 15:15:06 +0200 Subject: [PATCH 4/6] feat: iterate over validation errors There was only one validation method, that returned a single error (and it was the first error (JSON) or the last one (XML)). The new function `SchemabasedValidator.iterate_errors()` allows to enumerate over all the validation errors. Signed-off-by: Krisztian Fekete <1246751+e3krisztian@users.noreply.github.com> --- cyclonedx/validation/__init__.py | 9 +++++++++ cyclonedx/validation/json.py | 8 ++++++++ cyclonedx/validation/xml.py | 15 ++++++++++++++- tests/test_validation_json.py | 4 ++++ tests/test_validation_xml.py | 4 ++++ 5 files changed, 39 insertions(+), 1 deletion(-) diff --git a/cyclonedx/validation/__init__.py b/cyclonedx/validation/__init__.py index ba95ccb7..8163e1ef 100644 --- a/cyclonedx/validation/__init__.py +++ b/cyclonedx/validation/__init__.py @@ -17,6 +17,7 @@ from abc import ABC, abstractmethod +from collections.abc import Iterable from typing import TYPE_CHECKING, Any, Literal, Optional, Protocol, Union, overload from ..schema import OutputFormat @@ -120,6 +121,14 @@ def validate_str(self, data: str) -> Optional[ValidationError]: """ ... # pragma: no cover + def iterate_errors(self, data: str) -> Iterable[ValidationError]: + """Validate a string, enumerating all the problems. + + :param data: the data string to validate + :return: iterator over the errors + """ + ... # pragma: no cover + class BaseSchemabasedValidator(ABC, SchemabasedValidator): """Base Schema-based Validator""" diff --git a/cyclonedx/validation/json.py b/cyclonedx/validation/json.py index 7acb9632..c3ddfa27 100644 --- a/cyclonedx/validation/json.py +++ b/cyclonedx/validation/json.py @@ -19,6 +19,7 @@ __all__ = ['JsonValidator', 'JsonStrictValidator'] from abc import ABC +from collections.abc import Iterable from json import loads as json_loads from typing import TYPE_CHECKING, Any, Literal, Optional @@ -107,7 +108,14 @@ def __init__(self, schema_version: 'SchemaVersion') -> None: def validate_str(self, data: str) -> Optional[ValidationError]: raise self.__MDERROR[0] from self.__MDERROR[1] + def iterate_errors(self, data: str) -> Iterable[ValidationError]: + raise self.__MDERROR[0] from self.__MDERROR[1] else: + def iterate_errors(self, data: str) -> Iterable[ValidationError]: + json_data = json_loads(data) + validator = self._validator # may throw on error that MUST NOT be caught + yield from validator.iter_errors(json_data) + def validate_str(self, data: str) -> Optional[ValidationError]: return self._validate_data( json_loads(data)) diff --git a/cyclonedx/validation/xml.py b/cyclonedx/validation/xml.py index 2c0a6730..e643aac0 100644 --- a/cyclonedx/validation/xml.py +++ b/cyclonedx/validation/xml.py @@ -19,6 +19,7 @@ __all__ = ['XmlValidator'] from abc import ABC +from collections.abc import Iterable from typing import TYPE_CHECKING, Any, Literal, Optional from ..exception import MissingOptionalDependencyException @@ -53,12 +54,24 @@ def __init__(self, schema_version: 'SchemaVersion') -> None: # this is the def that is used for generating the documentation super().__init__(schema_version) - if _missing_deps_error: + if _missing_deps_error: # noqa:C901 __MDERROR = _missing_deps_error def validate_str(self, data: str) -> Optional[ValidationError]: raise self.__MDERROR[0] from self.__MDERROR[1] + + def iterate_errors(self, data: str) -> Iterable[ValidationError]: + raise self.__MDERROR[0] from self.__MDERROR[1] else: + def iterate_errors(self, data: str) -> Iterable[ValidationError]: + xml_data = xml_fromstring( # nosec B320 + bytes(data, encoding='utf8'), + parser=self.__xml_parser) + validator = self._validator # may throw on error that MUST NOT be caught + validator.validate(xml_data) + for error in validator.error_log: + yield ValidationError(error) + def validate_str(self, data: str) -> Optional[ValidationError]: return self._validate_data( xml_fromstring( # nosec B320 diff --git a/tests/test_validation_json.py b/tests/test_validation_json.py index f05c4e2b..1cebb5b7 100644 --- a/tests/test_validation_json.py +++ b/tests/test_validation_json.py @@ -117,6 +117,8 @@ def test_validate_no_none(self, schema_version: SchemaVersion, test_data_file: s self.skipTest('MissingOptionalDependencyException') self.assertIsNone(validation_error) + self.assertEqual(list(validator.iterate_errors(test_data)), []) + @idata(chain( _dp_sv_tf(False), _dp_sv_own(False) @@ -138,3 +140,5 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ squeezed_message = validation_error.get_squeezed_message(max_size=100) self.assertLessEqual(len(squeezed_message), 100, squeezed_message) + + self.assertNotEqual(list(validator.iterate_errors(test_data)), []) diff --git a/tests/test_validation_xml.py b/tests/test_validation_xml.py index 8f774362..4eae98d6 100644 --- a/tests/test_validation_xml.py +++ b/tests/test_validation_xml.py @@ -77,6 +77,8 @@ def test_validate_no_none(self, schema_version: SchemaVersion, test_data_file: s self.skipTest('MissingOptionalDependencyException') self.assertIsNone(validation_error) + self.assertEqual(list(validator.iterate_errors(test_data)), []) + @idata(chain( _dp_sv_tf(False), _dp_sv_own(False) @@ -98,3 +100,5 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ squeezed_message = validation_error.get_squeezed_message(max_size=100) self.assertLessEqual(len(squeezed_message), 100, squeezed_message) + + self.assertNotEqual(list(validator.iterate_errors(test_data)), []) From 98bc301967d2ea58b79ad1d2511fd550b61f3b62 Mon Sep 17 00:00:00 2001 From: Jan Kowalleck Date: Thu, 26 Jun 2025 12:08:55 +0200 Subject: [PATCH 5/6] feat: SchemabasedValidator.validate_str can return an iterator over all errors Signed-off-by: Jan Kowalleck --- cyclonedx/validation/__init__.py | 29 ++++++++++++++--- cyclonedx/validation/json.py | 52 ++++++++++++++++++++----------- cyclonedx/validation/xml.py | 53 +++++++++++++++++++------------- 3 files changed, 91 insertions(+), 43 deletions(-) diff --git a/cyclonedx/validation/__init__.py b/cyclonedx/validation/__init__.py index 8163e1ef..a3bf9996 100644 --- a/cyclonedx/validation/__init__.py +++ b/cyclonedx/validation/__init__.py @@ -111,21 +111,42 @@ def __str__(self) -> str: class SchemabasedValidator(Protocol): """Schema-based Validator protocol""" - def validate_str(self, data: str) -> Optional[ValidationError]: + @overload + def validate_str(self, data: str, *, all_errors: Literal[False] = ...) -> Optional[ValidationError]: """Validate a string :param data: the data string to validate + :param all_errors: whether to return all errors or only the last error - if any :return: validation error :retval None: if ``data`` is valid :retval ValidationError: if ``data`` is invalid """ ... # pragma: no cover - def iterate_errors(self, data: str) -> Iterable[ValidationError]: - """Validate a string, enumerating all the problems. + @overload + def validate_str(self, data: str, *, all_errors: Literal[True]) -> Optional[Iterable[ValidationError]]: + """Validate a string + + :param data: the data string to validate + :param all_errors: whether to return all errors or only the last error - if any + :return: validation error + :retval None: if ``data`` is valid + :retval Iterable[ValidationError]: if ``data`` is invalid + """ + ... # pragma: no cover + + def validate_str( + self, data: str, *, + all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: + """Validate a string :param data: the data string to validate - :return: iterator over the errors + :param all_errors: whether to return all errors or only the last error - if any + :return: validation error + :retval None: if ``data`` is valid + :retval ValidationError: if ``data`` is invalid and ``all_errors`` is ``False`` + :retval Iterable[ValidationError]: if ``data`` is invalid and ``all_errors`` is ``True`` """ ... # pragma: no cover diff --git a/cyclonedx/validation/json.py b/cyclonedx/validation/json.py index c3ddfa27..747c8627 100644 --- a/cyclonedx/validation/json.py +++ b/cyclonedx/validation/json.py @@ -20,8 +20,9 @@ from abc import ABC from collections.abc import Iterable +from itertools import chain from json import loads as json_loads -from typing import TYPE_CHECKING, Any, Literal, Optional +from typing import TYPE_CHECKING, Any, Literal, Optional, Union, overload from ..schema import OutputFormat @@ -102,31 +103,46 @@ def __init__(self, schema_version: 'SchemaVersion') -> None: # this is the def that is used for generating the documentation super().__init__(schema_version) + # region typing-relevant copy from parent class - needed for mypy and doc tools + + @overload + def validate_str(self, data: str, *, all_errors: Literal[False] = ...) -> Optional[ValidationError]: + ... # pragma: no cover + + @overload + def validate_str(self, data: str, *, all_errors: Literal[True]) -> Optional[Iterable[ValidationError]]: + ... # pragma: no cover + + def validate_str( + self, data: str, *, all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: + ... # pragma: no cover + + # endregion + if _missing_deps_error: # noqa:C901 __MDERROR = _missing_deps_error - def validate_str(self, data: str) -> Optional[ValidationError]: + def validate_str( # type:ignore[no-redef] # noqa:F811 # typing-relevant headers go first + self, data: str, *, all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: raise self.__MDERROR[0] from self.__MDERROR[1] - def iterate_errors(self, data: str) -> Iterable[ValidationError]: - raise self.__MDERROR[0] from self.__MDERROR[1] else: - def iterate_errors(self, data: str) -> Iterable[ValidationError]: - json_data = json_loads(data) - validator = self._validator # may throw on error that MUST NOT be caught - yield from validator.iter_errors(json_data) - - def validate_str(self, data: str) -> Optional[ValidationError]: - return self._validate_data( - json_loads(data)) - def _validate_data(self, data: Any) -> Optional[ValidationError]: + def validate_str( # type:ignore[no-redef] # noqa:F811 # typing-relevant headers go first + self, data: str, *, all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: validator = self._validator # may throw on error that MUST NOT be caught - try: - validator.validate(data) - except JsonSchemaValidationError as error: - return _JsonValidationError(error) - return None + structure = json_loads(data) + errors = validator.iter_errors(structure) + first_error = next(errors, None) + if first_error is None: + return None + first_error = _JsonValidationError(first_error) + return chain((first_error,), map(_JsonValidationError, errors)) \ + if all_errors \ + else first_error __validator: Optional['JsonSchemaValidator'] = None diff --git a/cyclonedx/validation/xml.py b/cyclonedx/validation/xml.py index e643aac0..7dca07c3 100644 --- a/cyclonedx/validation/xml.py +++ b/cyclonedx/validation/xml.py @@ -20,7 +20,7 @@ from abc import ABC from collections.abc import Iterable -from typing import TYPE_CHECKING, Any, Literal, Optional +from typing import TYPE_CHECKING, Literal, Optional, Union, overload from ..exception import MissingOptionalDependencyException from ..schema import OutputFormat @@ -54,35 +54,46 @@ def __init__(self, schema_version: 'SchemaVersion') -> None: # this is the def that is used for generating the documentation super().__init__(schema_version) + # region typing-relevant copy from parent class - needed for mypy and doc tools + + @overload + def validate_str(self, data: str, *, all_errors: Literal[False] = ...) -> Optional[ValidationError]: + ... # pragma: no cover + + @overload + def validate_str(self, data: str, *, all_errors: Literal[True]) -> Optional[Iterable[ValidationError]]: + ... # pragma: no cover + + def validate_str( + self, data: str, *, all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: + ... # pragma: no cover + + # endregion typing-relevant + if _missing_deps_error: # noqa:C901 __MDERROR = _missing_deps_error - def validate_str(self, data: str) -> Optional[ValidationError]: + def validate_str( # type:ignore[no-redef] # noqa:F811 # typing-relevant headers go first + self, data: str, *, all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: raise self.__MDERROR[0] from self.__MDERROR[1] - def iterate_errors(self, data: str) -> Iterable[ValidationError]: - raise self.__MDERROR[0] from self.__MDERROR[1] else: - def iterate_errors(self, data: str) -> Iterable[ValidationError]: - xml_data = xml_fromstring( # nosec B320 - bytes(data, encoding='utf8'), - parser=self.__xml_parser) + def validate_str( # type:ignore[no-redef] # noqa:F811 # typing-relevant headers go first + self, data: str, *, all_errors: bool = False + ) -> Union[None, ValidationError, Iterable[ValidationError]]: validator = self._validator # may throw on error that MUST NOT be caught - validator.validate(xml_data) - for error in validator.error_log: - yield ValidationError(error) - - def validate_str(self, data: str) -> Optional[ValidationError]: - return self._validate_data( - xml_fromstring( # nosec B320 + valid = validator.validate( + xml_fromstring( # nosec B320 -- we use a custom prepared safe parser bytes(data, encoding='utf8'), parser=self.__xml_parser)) - - def _validate_data(self, data: Any) -> Optional[ValidationError]: - validator = self._validator # may throw on error that MUST NOT be caught - if not validator.validate(data): - return ValidationError(validator.error_log.last_error) - return None + if valid: + return None + errors = validator.error_log + return map(ValidationError, errors) \ + if all_errors \ + else ValidationError(errors.last_error) __validator: Optional['XMLSchema'] = None From 30028888afaf766443f09b0b1c3f8b4c7cc9dd09 Mon Sep 17 00:00:00 2001 From: Jan Kowalleck Date: Thu, 26 Jun 2025 12:25:45 +0200 Subject: [PATCH 6/6] tests Signed-off-by: Jan Kowalleck --- cyclonedx/validation/__init__.py | 6 ++--- tests/test_validation_json.py | 44 ++++++++++++++++++++++++++++---- tests/test_validation_xml.py | 23 ++++++++++++++--- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/cyclonedx/validation/__init__.py b/cyclonedx/validation/__init__.py index a3bf9996..baabac03 100644 --- a/cyclonedx/validation/__init__.py +++ b/cyclonedx/validation/__init__.py @@ -116,7 +116,7 @@ def validate_str(self, data: str, *, all_errors: Literal[False] = ...) -> Option """Validate a string :param data: the data string to validate - :param all_errors: whether to return all errors or only the last error - if any + :param all_errors: whether to return all errors or only (any)one - if any :return: validation error :retval None: if ``data`` is valid :retval ValidationError: if ``data`` is invalid @@ -128,7 +128,7 @@ def validate_str(self, data: str, *, all_errors: Literal[True]) -> Optional[Iter """Validate a string :param data: the data string to validate - :param all_errors: whether to return all errors or only the last error - if any + :param all_errors: whether to return all errors or only (any)one - if any :return: validation error :retval None: if ``data`` is valid :retval Iterable[ValidationError]: if ``data`` is invalid @@ -142,7 +142,7 @@ def validate_str( """Validate a string :param data: the data string to validate - :param all_errors: whether to return all errors or only the last error - if any + :param all_errors: whether to return all errors or only (any)one - if any :return: validation error :retval None: if ``data`` is valid :retval ValidationError: if ``data`` is invalid and ``all_errors`` is ``False`` diff --git a/tests/test_validation_json.py b/tests/test_validation_json.py index 1cebb5b7..3313d24b 100644 --- a/tests/test_validation_json.py +++ b/tests/test_validation_json.py @@ -82,7 +82,7 @@ def test_validate_no_none(self, schema_version: SchemaVersion, test_data_file: s _dp_sv_own(False) )) @unpack - def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_file: str) -> None: + def test_validate_expected_error_one(self, schema_version: SchemaVersion, test_data_file: str) -> None: validator = JsonValidator(schema_version) with open(join(test_data_file)) as tdfh: test_data = tdfh.read() @@ -93,6 +93,25 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ self.assertIsNotNone(validation_error) self.assertIsNotNone(validation_error.data) + @idata(chain( + _dp_sv_tf(False), + _dp_sv_own(False) + )) + @unpack + def test_validate_expected_error_iterator(self, schema_version: SchemaVersion, test_data_file: str) -> None: + validator = JsonValidator(schema_version) + with open(join(test_data_file)) as tdfh: + test_data = tdfh.read() + try: + validation_errors = validator.validate_str(test_data, all_errors=True) + except MissingOptionalDependencyException: + self.skipTest('MissingOptionalDependencyException') + self.assertIsNotNone(validation_errors) + validation_errors = tuple(validation_errors) + self.assertGreater(len(validation_errors), 0) + for validation_error in validation_errors: + self.assertIsNotNone(validation_error.data) + @ddt class TestJsonStrictValidator(TestCase): @@ -117,14 +136,12 @@ def test_validate_no_none(self, schema_version: SchemaVersion, test_data_file: s self.skipTest('MissingOptionalDependencyException') self.assertIsNone(validation_error) - self.assertEqual(list(validator.iterate_errors(test_data)), []) - @idata(chain( _dp_sv_tf(False), _dp_sv_own(False) )) @unpack - def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_file: str) -> None: + def test_validate_expected_error_one(self, schema_version: SchemaVersion, test_data_file: str) -> None: validator = JsonStrictValidator(schema_version) with open(join(test_data_file)) as tdfh: test_data = tdfh.read() @@ -141,4 +158,21 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ squeezed_message = validation_error.get_squeezed_message(max_size=100) self.assertLessEqual(len(squeezed_message), 100, squeezed_message) - self.assertNotEqual(list(validator.iterate_errors(test_data)), []) + @idata(chain( + _dp_sv_tf(False), + _dp_sv_own(False) + )) + @unpack + def test_validate_expected_error_iterator(self, schema_version: SchemaVersion, test_data_file: str) -> None: + validator = JsonValidator(schema_version) + with open(join(test_data_file)) as tdfh: + test_data = tdfh.read() + try: + validation_errors = validator.validate_str(test_data, all_errors=True) + except MissingOptionalDependencyException: + self.skipTest('MissingOptionalDependencyException') + self.assertIsNotNone(validation_errors) + validation_errors = tuple(validation_errors) + self.assertGreater(len(validation_errors), 0) + for validation_error in validation_errors: + self.assertIsNotNone(validation_error.data) diff --git a/tests/test_validation_xml.py b/tests/test_validation_xml.py index 4eae98d6..4e83784e 100644 --- a/tests/test_validation_xml.py +++ b/tests/test_validation_xml.py @@ -77,14 +77,12 @@ def test_validate_no_none(self, schema_version: SchemaVersion, test_data_file: s self.skipTest('MissingOptionalDependencyException') self.assertIsNone(validation_error) - self.assertEqual(list(validator.iterate_errors(test_data)), []) - @idata(chain( _dp_sv_tf(False), _dp_sv_own(False) )) @unpack - def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_file: str) -> None: + def test_validate_expected_error_one(self, schema_version: SchemaVersion, test_data_file: str) -> None: validator = XmlValidator(schema_version) with open(join(test_data_file)) as tdfh: test_data = tdfh.read() @@ -101,4 +99,21 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_ squeezed_message = validation_error.get_squeezed_message(max_size=100) self.assertLessEqual(len(squeezed_message), 100, squeezed_message) - self.assertNotEqual(list(validator.iterate_errors(test_data)), []) + @idata(chain( + _dp_sv_tf(False), + _dp_sv_own(False) + )) + @unpack + def test_validate_expected_error_iterator(self, schema_version: SchemaVersion, test_data_file: str) -> None: + validator = XmlValidator(schema_version) + with open(join(test_data_file)) as tdfh: + test_data = tdfh.read() + try: + validation_errors = validator.validate_str(test_data, all_errors=True) + except MissingOptionalDependencyException: + self.skipTest('MissingOptionalDependencyException') + self.assertIsNotNone(validation_errors) + validation_errors = tuple(validation_errors) + self.assertGreater(len(validation_errors), 0) + for validation_error in validation_errors: + self.assertIsNotNone(validation_error.data)