Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ python-benedict is a dict subclass with **keylist/keypath/keyattr** support, **I
- [Disable keypath functionality](#disable-keypath-functionality)
- [List index support](#list-index-support)
- [I/O](#io)
- [Data validation using Pydantic models](#data-validation-using-pydantic-models)
- [API](#api)
- [Utility methods](#utility-methods)
- [I/O methods](#io-methods)
Expand All @@ -64,6 +65,7 @@ Here the hierarchy of possible installation targets available when running `pip
- `[yaml]`
- `[parse]`
- `[s3]`
- `[validate]`

## Usage

Expand Down Expand Up @@ -126,7 +128,8 @@ or using the `getter/setter` property.
d.keyattr_dynamic = True
```

> **Warning** - even if this feature is very useful, it has some obvious limitations: it works only for string keys that are *unprotected* (not starting with an `_`) and that don't clash with the currently supported methods names.
> [!WARNING]
> Even if this feature is very useful, it has some obvious limitations: it works only for string keys that are *unprotected* (not starting with an `_`) and that don't clash with the currently supported methods names.

### Keylist
Wherever a **key** is used, it is possible to use also a **list of keys**.
Expand Down Expand Up @@ -278,6 +281,31 @@ Here are the details of the supported formats, operations and extra options docs
| `xml` | :white_check_mark: | :white_check_mark: | [xmltodict](https://github.com/martinblech/xmltodict) |
| `yaml` | :white_check_mark: | :white_check_mark: | [PyYAML](https://pyyaml.org/wiki/PyYAMLDocumentation) |

### Data validation using Pydantic models

> [!IMPORTANT]
> This feature **requires** the `validate` extra to be installed: `pip install "python-benedict[validate]`

You can validate data in different ways:

1. Using the `validate` method directly
```python
d = benedict(my_data)
d.validate(schema=MySchema)
```

2. Using the `schema` parameter during initialization
```python
d = benedict(my_data, schema=MySchema)
```

3. Using the `schema` parameter with any `from_{format}` method
```python
d = benedict.from_json(my_data, schema=MySchema)
```

If validation fails, a `ValidationError` will be raised with details about what went wrong.

### API

- **Utility methods**
Expand Down Expand Up @@ -332,6 +360,7 @@ Here are the details of the supported formats, operations and extra options docs
- [`to_toml`](#to_toml)
- [`to_xml`](#to_xml)
- [`to_yaml`](#to_yaml)
- [`validate`](#validate)

- **Parse methods**

Expand Down Expand Up @@ -814,6 +843,14 @@ s = d.to_xml(**kwargs)
s = d.to_yaml(**kwargs)
```

#### `validate`

```python
# Validate the dict and update it using a Pydantic schema.
# A ValidationError is raised in case of failure.
d.validate(schema=MySchema)
```

### Parse methods

These methods are wrappers of the `get` method, they parse data trying to return it in the expected type.
Expand Down
29 changes: 25 additions & 4 deletions benedict/dicts/io/io_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from benedict.dicts.base import BaseDict
from benedict.dicts.io import io_util
from benedict.exceptions import ExtrasRequireModuleNotFoundError
from benedict.utils import type_util
from benedict.utils import pydantic_util, type_util
from benedict.utils.pydantic_util import PydanticModel

_K = TypeVar("_K", default=str)
_V = TypeVar("_V", default=Any)
Expand All @@ -29,18 +30,24 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
d = IODict._decode_init(arg, **kwargs)
super().__init__(d)
return

schema = kwargs.pop("schema", None)
super().__init__(*args, **kwargs)
if schema:
self.validate(schema=schema)

@staticmethod
def _decode_init(s: str | Path, **kwargs: Any) -> dict[str, Any]:
autodetected_format = io_util.autodetect_format(s)
default_format = autodetected_format or "json"
format = kwargs.pop("format", default_format).lower()
# decode data-string and initialize with dict data.
return IODict._decode(s, format, **kwargs)
data = IODict._decode(s, format, **kwargs)
return data

@staticmethod
def _decode(s: str | Path, format: str, **kwargs: Any) -> dict[str, Any]:
schema = kwargs.pop("schema", None)
data = None
try:
data = io_util.decode(s, format, **kwargs)
Expand All @@ -54,12 +61,15 @@ def _decode(s: str | Path, format: str, **kwargs: Any) -> dict[str, Any]:
) from None
# if possible return data as dict, otherwise raise exception
if type_util.is_dict(data):
return data
pass
elif type_util.is_list(data):
# force list to dict
return {"values": data}
data = {"values": data}
else:
raise ValueError(f"Invalid data type: {type(data)}, expected dict or list.")
if schema:
data = pydantic_util.validate_data(data, schema=schema)
return data

@staticmethod
def _encode(d: Any, format: str, **kwargs: Any) -> Any:
Expand Down Expand Up @@ -353,3 +363,14 @@ def to_yaml(self, **kwargs: Any) -> str:
A ValueError is raised in case of failure.
"""
return cast("str", self._encode(self.dict(), "yaml", **kwargs))

def validate(self, *, schema: PydanticModel) -> None:
"""
Validate the dict and update it using a Pydantic schema.

Args:
schema: Pydantic model class for validation
"""
data = pydantic_util.validate_data(self, schema=schema)
self.clear()
self.update(data)
5 changes: 5 additions & 0 deletions benedict/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"require_parse",
"require_s3",
"require_toml",
"require_validate",
"require_xls",
"require_xml",
"require_yaml",
Expand Down Expand Up @@ -32,6 +33,10 @@ def require_toml(*, installed: bool) -> None:
_require_optional_dependencies(target="toml", installed=installed)


def require_validate(*, installed: bool) -> None:
_require_optional_dependencies(target="validate", installed=installed)


def require_xls(*, installed: bool) -> None:
_require_optional_dependencies(target="xls", installed=installed)

Expand Down
36 changes: 36 additions & 0 deletions benedict/utils/pydantic_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Any

from benedict.extras import require_validate

try:
from pydantic import BaseModel

pydantic_installed = True
except ModuleNotFoundError:
pydantic_installed = False
BaseModel = None

PydanticModel = type["BaseModel"]


def _is_pydantic_model(obj: Any) -> bool:
"""
Check if an object is a Pydantic model class.
"""
return pydantic_installed and isinstance(obj, type) and issubclass(obj, BaseModel)


def validate_data(data: Any, *, schema: PydanticModel | None = None) -> Any:
"""
Validate data against a Pydantic schema if provided.
"""
if schema is None:
return data

require_validate(installed=pydantic_installed)

if not _is_pydantic_model(schema):
raise ValueError("Invalid schema. Schema must be a Pydantic model class.")

validated = schema.model_validate(data)
return validated.model_dump()
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ Twitter = "https://twitter.com/fabiocaccamo"

[project.optional-dependencies]
all = [
"python-benedict[io,parse,s3]",
"python-benedict[io,parse,s3,validate]",
]
html = [
"beautifulsoup4 >= 4.12.0, < 5.0.0",
Expand All @@ -139,6 +139,9 @@ s3 = [
toml = [
"toml >= 0.10.2, < 1.0.0",
]
validate = [
"pydantic >= 1.10.0",
]
xls = [
"openpyxl >= 3.0.0, < 4.0.0",
"xlrd >= 2.0.0, < 3.0.0",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ftfy == 6.3.1
mailchecker == 6.0.18
openpyxl == 3.1.5
phonenumbers == 9.0.15
pydantic == 2.11.3
python-dateutil == 2.9.0.post0
python-fsutil == 0.15.0
python-slugify == 8.0.4
Expand Down
77 changes: 77 additions & 0 deletions tests/dicts/io/test_io_dict_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json
import unittest

from pydantic import BaseModel, ValidationError

from benedict import benedict


class TestIODictSchema(unittest.TestCase):
def setUp(self):
class User(BaseModel):
name: str
age: int
email: str

class UserList(BaseModel):
users: list[User]

class UserOptional(BaseModel):
name: str
age: int | None = None
email: str | None = None

self.User = User
self.UserList = UserList
self.UserOptional = UserOptional
self.valid_data = {
"name": "John",
"age": 30,
"email": "john@example.com",
}
self.invalid_data = {
"name": "John",
"age": "not_an_int",
"email": "john@example.com",
}
self.minimal_data = {"name": "John"}

def test_constructor_with_schema(self):
d = benedict(self.valid_data, schema=self.User)
self.assertEqual(d["name"], "John")
self.assertEqual(d["age"], 30)
self.assertEqual(d["email"], "john@example.com")

with self.assertRaises(ValidationError):
benedict(self.invalid_data, schema=self.User)

def test_constructor_with_schema_and_optional_fields(self):
d = benedict(self.minimal_data, schema=self.UserOptional)
self.assertEqual(d["name"], "John")
self.assertIsNone(d.get("age"))
self.assertIsNone(d.get("email"))

def test_constructor_with_invalid_schema(self):
class InvalidSchema:
pass

with self.assertRaises(ValueError):
benedict(self.valid_data, schema=InvalidSchema)

with self.assertRaises(ValueError):
benedict(self.valid_data, schema="not_a_schema")

def test_from_json_with_schema_and_valid_data(self):
json_data = json.dumps(self.valid_data)
d = benedict.from_json(json_data, schema=self.User)
self.assertEqual(d["name"], "John")
self.assertEqual(d["age"], 30)

def test_from_json_with_schema_and_invalid_data(self):
json_data = json.dumps(self.invalid_data)
with self.assertRaises(ValidationError):
benedict.from_json(json_data, schema=self.User)


if __name__ == "__main__":
unittest.main()
82 changes: 82 additions & 0 deletions tests/dicts/io/test_io_dict_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import unittest

from pydantic import BaseModel, ValidationError

from benedict.dicts.io import IODict


class TestIODictValidate(unittest.TestCase):
def setUp(self):
class User(BaseModel):
name: str
age: int
email: str

class UserOptional(BaseModel):
name: str
age: int | None = None
email: str | None = None

self.User = User
self.UserOptional = UserOptional
self.valid_data = {
"name": "John",
"age": 30,
"email": "john@example.com",
}
self.invalid_data = {
"name": "John",
"age": "not_an_int",
"email": "john@example.com",
}
self.minimal_data = {"name": "John"}
self.data_with_extra_fields = {
"name": "John",
"age": 30,
"email": "john@example.com",
"role": "admin",
"active": True,
}

def test_validate_valid_data(self):
d = IODict(self.valid_data)
d.validate(schema=self.User)
self.assertEqual(d["name"], "John")
self.assertEqual(d["age"], 30)
self.assertEqual(d["email"], "john@example.com")

def test_validate_invalid_data(self):
d = IODict(self.invalid_data)
with self.assertRaises(ValidationError):
d.validate(schema=self.User)

def test_validate_optional_fields(self):
d = IODict(self.minimal_data)
d.validate(schema=self.UserOptional)
self.assertEqual(d["name"], "John")
self.assertIsNone(d.get("age"))
self.assertIsNone(d.get("email"))

def test_validate_removes_extra_fields(self):
d = IODict(self.data_with_extra_fields)
d.validate(schema=self.User)
# required fields are preserved
self.assertEqual(d["name"], "John")
self.assertEqual(d["age"], 30)
self.assertEqual(d["email"], "john@example.com")
# extra fields are removed
self.assertNotIn("role", d)
self.assertNotIn("active", d)
# only the schema fields exist
self.assertEqual(set(d.keys()), {"name", "age", "email"})

def test_validate_invalid_schema(self):
class InvalidSchema:
pass

d = IODict(self.valid_data)
with self.assertRaises(ValueError):
d.validate(schema=InvalidSchema)

with self.assertRaises(ValueError):
d.validate(schema="not_a_schema")
Loading
Loading