Skip to content

Commit ef0a47a

Browse files
authored
Adjust validation function (#206)
* Adjust validation function Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Adjust validation function Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Adjust test structure Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Add normalization for pub packages Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Update spec Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Use objects instead of dictionaries for testing Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> * Update generate validators Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com> --------- Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent de6b657 commit ef0a47a

File tree

5 files changed

+539
-216
lines changed

5 files changed

+539
-216
lines changed

etc/scripts/generate_validators.py

Lines changed: 136 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
]
4545
}
4646
"""
47-
from packageurl import PackageURL
47+
4848
from pathlib import Path
4949
import json
5050

@@ -76,32 +76,140 @@
7676
Validate each type according to the PURL spec type definitions
7777
"""
7878
79-
class TypeValidator:
79+
class BasePurlType:
80+
"""
81+
Base class for all PURL type classes
82+
"""
83+
84+
type: str
85+
"""The type string for this Package-URL type."""
86+
87+
type_name: str
88+
"""The name for this PURL type."""
89+
90+
description: str
91+
"""The description of this PURL type."""
92+
93+
use_repository: bool = False
94+
"""true if this PURL type use a public package repository."""
95+
96+
default_repository_url: str
97+
"""The default public repository URL for this PURL type"""
98+
99+
namespace_requirement: str
100+
""""States if this namespace is required, optional, or prohibited."""
101+
102+
allowed_qualifiers: dict = {"repository_url", "arch"}
103+
"""Set of allowed qualifier keys for this PURL type."""
104+
105+
namespace_case_sensitive: bool = True
106+
"""true if namespace is case sensitive. If false, the canonical form must be lowercased."""
107+
108+
name_case_sensitive: bool = True
109+
"""true if name is case sensitive. If false, the canonical form must be lowercased."""
110+
111+
version_case_sensitive: bool = True
112+
"""true if version is case sensitive. If false, the canonical form must be lowercased."""
113+
114+
purl_pattern: str
115+
"""A regex pattern that matches valid purls of this type."""
116+
80117
@classmethod
81118
def validate(cls, purl, strict=False):
119+
"""
120+
Validate a PackageURL instance or string.
121+
Yields ValidationMessage and performs strict validation if strict=True
122+
"""
123+
from packageurl import ValidationMessage
124+
from packageurl import ValidationSeverity
125+
126+
if not purl:
127+
yield ValidationMessage(
128+
severity=ValidationSeverity.ERROR,
129+
message="No purl provided",
130+
)
131+
return
132+
133+
from packageurl import PackageURL
134+
135+
if not isinstance(purl, PackageURL):
136+
try:
137+
purl = PackageURL.from_string(purl, normalize_purl=False)
138+
except Exception as e:
139+
yield ValidationMessage(
140+
severity=ValidationSeverity.ERROR,
141+
message=f"Invalid purl {purl!r} string: {e}",
142+
)
143+
return
144+
82145
if not strict:
83146
purl = cls.normalize(purl)
84147
148+
yield from cls._validate_namespace(purl)
149+
yield from cls._validate_name(purl)
150+
yield from cls._validate_version(purl)
151+
if strict:
152+
yield from cls._validate_qualifiers(purl)
153+
154+
messages = cls.validate_using_type_rules(purl, strict=strict)
155+
if messages:
156+
yield from messages
157+
158+
@classmethod
159+
def _validate_namespace(cls, purl):
160+
from packageurl import ValidationMessage
161+
from packageurl import ValidationSeverity
162+
85163
if cls.namespace_requirement == "prohibited" and purl.namespace:
86-
yield f"Namespace is prohibited for purl type: {cls.type!r}"
164+
yield ValidationMessage(
165+
severity=ValidationSeverity.ERROR,
166+
message=f"Namespace is prohibited for purl type: {cls.type!r}",
167+
)
87168
88169
elif cls.namespace_requirement == "required" and not purl.namespace:
89-
yield f"Namespace is required for purl type: {cls.type!r}"
170+
yield ValidationMessage(
171+
severity=ValidationSeverity.ERROR,
172+
message=f"Namespace is required for purl type: {cls.type!r}",
173+
)
90174
91-
if (
175+
# TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema
176+
if purl.type == "cpan":
177+
if purl.namespace and purl.namespace != purl.namespace.upper():
178+
yield ValidationMessage(
179+
severity=ValidationSeverity.WARNING,
180+
message=f"Namespace must be uppercase for purl type: {cls.type!r}",
181+
)
182+
elif (
92183
not cls.namespace_case_sensitive
93184
and purl.namespace
94185
and purl.namespace.lower() != purl.namespace
95186
):
96-
yield f"Namespace is not lowercased for purl type: {cls.type!r}"
187+
yield ValidationMessage(
188+
severity=ValidationSeverity.WARNING,
189+
message=f"Namespace is not lowercased for purl type: {cls.type!r}",
190+
)
97191
192+
@classmethod
193+
def _validate_name(cls, purl):
98194
if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name:
99-
yield f"Name is not lowercased for purl type: {cls.type!r}"
195+
from packageurl import ValidationMessage
196+
from packageurl import ValidationSeverity
197+
198+
yield ValidationMessage(
199+
severity=ValidationSeverity.WARNING,
200+
message=f"Name is not lowercased for purl type: {cls.type!r}",
201+
)
100202
203+
@classmethod
204+
def _validate_version(cls, purl):
101205
if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version:
102-
yield f"Version is not lowercased for purl type: {cls.type!r}"
206+
from packageurl import ValidationMessage
207+
from packageurl import ValidationSeverity
103208
104-
yield from cls.validate_type(purl, strict=strict)
209+
yield ValidationMessage(
210+
severity=ValidationSeverity.WARNING,
211+
message=f"Version is not lowercased for purl type: {cls.type!r}",
212+
)
105213
106214
@classmethod
107215
def normalize(cls, purl):
@@ -130,12 +238,16 @@ def normalize(cls, purl):
130238
)
131239
132240
@classmethod
133-
def validate_type(cls, purl, strict=False):
134-
if strict:
135-
yield from cls.validate_qualifiers(purl=purl)
241+
def validate_using_type_rules(cls, purl, strict=False):
242+
"""
243+
Validate using any additional type specific rules.
244+
Yield validation messages.
245+
Subclasses can override this method to add type specific validation rules.
246+
"""
247+
return iter([])
136248
137249
@classmethod
138-
def validate_qualifiers(cls, purl):
250+
def _validate_qualifiers(cls, purl):
139251
if not purl.qualifiers:
140252
return
141253
@@ -145,9 +257,15 @@ def validate_qualifiers(cls, purl):
145257
disallowed = purl_qualifiers_keys - allowed_qualifiers_set
146258
147259
if disallowed:
148-
yield (
149-
f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. "
150-
f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}"
260+
from packageurl import ValidationMessage
261+
from packageurl import ValidationSeverity
262+
263+
yield ValidationMessage(
264+
severity=ValidationSeverity.INFO,
265+
message=(
266+
f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. "
267+
f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}"
268+
),
151269
)
152270
'''
153271

@@ -185,10 +303,10 @@ def generate_validators():
185303
type_def = json.loads(type.read_text())
186304

187305
_type = type_def["type"]
188-
standard_validator_class = "TypeValidator"
306+
standard_validator_class = "BasePurlType"
189307

190308
class_prefix = _type.capitalize()
191-
class_name = f"{class_prefix}{standard_validator_class}"
309+
class_name = f"{class_prefix}TypeDefinition"
192310
validators_by_type[_type] = class_name
193311
name_normalization_rules=type_def["name_definition"].get("normalization_rules") or []
194312
allowed_qualifiers = [defintion.get("key") for defintion in type_def.get("qualifiers_definition") or []]

src/packageurl/__init__.py

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@
2424

2525
from __future__ import annotations
2626

27+
import dataclasses
2728
import re
2829
import string
2930
from collections import namedtuple
3031
from collections.abc import Mapping
32+
from dataclasses import dataclass
33+
from enum import Enum
3134
from typing import TYPE_CHECKING
3235
from typing import Any
3336
from typing import Optional
@@ -58,6 +61,19 @@
5861
"""
5962

6063

64+
class ValidationSeverity(str, Enum):
65+
ERROR = "error"
66+
WARNING = "warning"
67+
INFO = "info"
68+
69+
70+
@dataclass
71+
class ValidationMessage:
72+
severity: ValidationSeverity
73+
message: str
74+
to_dict = dataclasses.asdict
75+
76+
6177
def quote(s: AnyStr) -> str:
6278
"""
6379
Return a percent-encoded unicode string, except for colon :, given an `s`
@@ -188,12 +204,15 @@ def normalize_name(
188204
"apk",
189205
"bitnami",
190206
"hex",
207+
"pub",
191208
):
192209
name_str = name_str.lower()
193210
if ptype == "pypi":
194211
name_str = name_str.replace("_", "-").lower()
195212
if ptype == "hackage":
196213
name_str = name_str.replace("_", "-")
214+
if ptype == "pub":
215+
name_str = re.sub(r"[^a-z0-9]", "_", name_str.lower())
197216
return name_str or None
198217

199218

@@ -521,24 +540,41 @@ def to_string(self, encode: bool | None = True) -> str:
521540

522541
return "".join(purl)
523542

524-
def validate(self, strict: bool = False) -> list[str]:
543+
def validate(self, strict: bool = False) -> list["ValidationMessage"]:
525544
"""
526545
Validate this PackageURL object and return a list of validation error messages.
527546
"""
528-
from packageurl.validate import VALIDATORS_BY_TYPE
529-
530-
if self:
531-
try:
532-
validator_class = VALIDATORS_BY_TYPE.get(self.type)
533-
if not validator_class:
534-
return [f"Given type: {self.type} can not be validated"]
535-
messages = list(validator_class.validate(self, strict)) # type: ignore[no-untyped-call]
536-
return messages
537-
except NoRouteAvailable:
538-
return [f"Given type: {self.type} can not be validated"]
547+
from packageurl.validate import DEFINITIONS_BY_TYPE
548+
549+
validator_class = DEFINITIONS_BY_TYPE.get(self.type)
550+
if not validator_class:
551+
return [
552+
ValidationMessage(
553+
severity=ValidationSeverity.ERROR,
554+
message=f"Unexpected purl type: expected {self.type!r}",
555+
)
556+
]
557+
return list(validator_class.validate(purl=self, strict=strict)) # type: ignore[no-untyped-call]
558+
559+
@classmethod
560+
def validate_string(cls, purl: str, strict: bool = False) -> list["ValidationMessage"]:
561+
"""
562+
Validate a PURL string and return a list of validation error messages.
563+
"""
564+
try:
565+
purl_obj = cls.from_string(purl, normalize_purl=not strict)
566+
assert isinstance(purl_obj, PackageURL)
567+
return purl_obj.validate(strict=strict)
568+
except ValueError as e:
569+
return [
570+
ValidationMessage(
571+
severity=ValidationSeverity.ERROR,
572+
message=str(e),
573+
)
574+
]
539575

540576
@classmethod
541-
def from_string(cls, purl: str) -> Self:
577+
def from_string(cls, purl: str, normalize_purl: bool = True) -> Self:
542578
"""
543579
Return a PackageURL object parsed from a string.
544580
Raise ValueError on errors.
@@ -622,14 +658,18 @@ def from_string(cls, purl: str) -> Self:
622658
if not name:
623659
raise ValueError(f"purl is missing the required name component: {purl!r}")
624660

625-
type_, namespace, name, version, qualifiers, subpath = normalize(
626-
type_,
627-
namespace,
628-
name,
629-
version,
630-
qualifiers_str,
631-
subpath,
632-
encode=False,
661+
if normalize_purl:
662+
type_, namespace, name, version, qualifiers, subpath = normalize(
663+
type_,
664+
namespace,
665+
name,
666+
version,
667+
qualifiers_str,
668+
subpath,
669+
encode=False,
670+
)
671+
else:
672+
qualifiers = normalize_qualifiers(qualifiers_str, encode=False) or {}
673+
return cls(
674+
type_, namespace, name, version, qualifiers, subpath, normalize_purl=normalize_purl
633675
)
634-
635-
return cls(type_, namespace, name, version, qualifiers, subpath)

0 commit comments

Comments
 (0)