fabiocaccamo · fabiocaccamo · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025
diff --git a/README.md b/README.md
@@ -39,6 +39,7 @@ python-benedict is a dict subclass with **keylist/keypath/keyattr** support, **I
         -   [Disable keypath functionality](#disable-keypath-functionality)
         -   [List index support](#list-index-support)
     -   [I/O](#io)
+        -   [Data validation using Pydantic models](#data-validation-using-pydantic-models)
     -   [API](#api)
         -   [Utility methods](#utility-methods)
         -   [I/O methods](#io-methods)
@@ -64,6 +65,7 @@ Here the hierarchy of possible installation targets available when running `pip
         - `[yaml]`
     - `[parse]`
     - `[s3]`
+    - `[validate]`
 
 ## Usage
 
@@ -126,7 +128,8 @@ or using the `getter/setter` property.
 d.keyattr_dynamic = True
 ```
 
-> **Warning** - even if this feature is very useful, it has some obvious limitations: it works only for string keys that are *unprotected* (not starting with an `_`) and that don't clash with the currently supported methods names.
+> [!WARNING]
+> Even if this feature is very useful, it has some obvious limitations: it works only for string keys that are *unprotected* (not starting with an `_`) and that don't clash with the currently supported methods names.
 
 ### Keylist
 Wherever a **key** is used, it is possible to use also a **list of keys**.
@@ -278,6 +281,31 @@ Here are the details of the supported formats, operations and extra options docs
 | `xml`          | :white_check_mark: | :white_check_mark: | [xmltodict](https://github.com/martinblech/xmltodict)                                 |
 | `yaml`         | :white_check_mark: | :white_check_mark: | [PyYAML](https://pyyaml.org/wiki/PyYAMLDocumentation)                                 |
 
+### Data validation using Pydantic models
+
+> [!IMPORTANT]
+> This feature **requires** the `validate` extra to be installed: `pip install "python-benedict[validate]`
+
+You can validate data in different ways:
+
+1. Using the `validate` method directly
+```python
+d = benedict(my_data)
+d.validate(schema=MySchema)
+```
+
+2. Using the `schema` parameter during initialization
+```python
+d = benedict(my_data, schema=MySchema)
+```
+
+3. Using the `schema` parameter with any `from_{format}` method
+```python
+d = benedict.from_json(my_data, schema=MySchema)
+```
+
+If validation fails, a `ValidationError` will be raised with details about what went wrong.
+
 ### API
 
 -   **Utility methods**
@@ -332,6 +360,7 @@ Here are the details of the supported formats, operations and extra options docs
     -   [`to_toml`](#to_toml)
     -   [`to_xml`](#to_xml)
     -   [`to_yaml`](#to_yaml)
+    -   [`validate`](#validate)
 
 -   **Parse methods**
 
@@ -814,6 +843,14 @@ s = d.to_xml(**kwargs)
 s = d.to_yaml(**kwargs)
 ```
 
+#### `validate`
+
+```python
+# Validate the dict and update it using a Pydantic schema.
+# A ValidationError is raised in case of failure.
+d.validate(schema=MySchema)
+```
+
 ### Parse methods
 
 These methods are wrappers of the `get` method, they parse data trying to return it in the expected type.

diff --git a/benedict/dicts/io/io_dict.py b/benedict/dicts/io/io_dict.py
@@ -9,7 +9,8 @@
 from benedict.dicts.base import BaseDict
 from benedict.dicts.io import io_util
 from benedict.exceptions import ExtrasRequireModuleNotFoundError
-from benedict.utils import type_util
+from benedict.utils import pydantic_util, type_util
+from benedict.utils.pydantic_util import PydanticModel
 
 _K = TypeVar("_K", default=str)
 _V = TypeVar("_V", default=Any)
@@ -29,18 +30,24 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
                 d = IODict._decode_init(arg, **kwargs)
                 super().__init__(d)
                 return
+
+        schema = kwargs.pop("schema", None)
         super().__init__(*args, **kwargs)
+        if schema:
+            self.validate(schema=schema)
 
     @staticmethod
     def _decode_init(s: str | Path, **kwargs: Any) -> dict[str, Any]:
         autodetected_format = io_util.autodetect_format(s)
         default_format = autodetected_format or "json"
         format = kwargs.pop("format", default_format).lower()
         # decode data-string and initialize with dict data.
-        return IODict._decode(s, format, **kwargs)
+        data = IODict._decode(s, format, **kwargs)
+        return data
 
     @staticmethod
     def _decode(s: str | Path, format: str, **kwargs: Any) -> dict[str, Any]:
+        schema = kwargs.pop("schema", None)
         data = None
         try:
             data = io_util.decode(s, format, **kwargs)
@@ -54,12 +61,15 @@ def _decode(s: str | Path, format: str, **kwargs: Any) -> dict[str, Any]:
             ) from None
         # if possible return data as dict, otherwise raise exception
         if type_util.is_dict(data):
-            return data
+            pass
         elif type_util.is_list(data):
             # force list to dict
-            return {"values": data}
+            data = {"values": data}
         else:
             raise ValueError(f"Invalid data type: {type(data)}, expected dict or list.")
+        if schema:
+            data = pydantic_util.validate_data(data, schema=schema)
+        return data
 
     @staticmethod
     def _encode(d: Any, format: str, **kwargs: Any) -> Any:
@@ -353,3 +363,14 @@ def to_yaml(self, **kwargs: Any) -> str:
         A ValueError is raised in case of failure.
         """
         return cast("str", self._encode(self.dict(), "yaml", **kwargs))
+
+    def validate(self, *, schema: PydanticModel) -> None:
+        """
+        Validate the dict and update it using a Pydantic schema.
+
+        Args:
+            schema: Pydantic model class for validation
+        """
+        data = pydantic_util.validate_data(self, schema=schema)
+        self.clear()
+        self.update(data)
diff --git a/benedict/extras.py b/benedict/extras.py
@@ -5,6 +5,7 @@
     "require_parse",
     "require_s3",
     "require_toml",
+    "require_validate",
     "require_xls",
     "require_xml",
     "require_yaml",
@@ -32,6 +33,10 @@ def require_toml(*, installed: bool) -> None:
     _require_optional_dependencies(target="toml", installed=installed)
 
 
+def require_validate(*, installed: bool) -> None:
+    _require_optional_dependencies(target="validate", installed=installed)
+
+
 def require_xls(*, installed: bool) -> None:
     _require_optional_dependencies(target="xls", installed=installed)
 

diff --git a/benedict/utils/pydantic_util.py b/benedict/utils/pydantic_util.py
@@ -0,0 +1,36 @@
+from typing import Any
+
+from benedict.extras import require_validate
+
+try:
+    from pydantic import BaseModel
+
+    pydantic_installed = True
+except ModuleNotFoundError:
+    pydantic_installed = False
+    BaseModel = None
+
+PydanticModel = type["BaseModel"]
+
+
+def _is_pydantic_model(obj: Any) -> bool:
+    """
+    Check if an object is a Pydantic model class.
+    """
+    return pydantic_installed and isinstance(obj, type) and issubclass(obj, BaseModel)
+
+
+def validate_data(data: Any, *, schema: PydanticModel | None = None) -> Any:
+    """
+    Validate data against a Pydantic schema if provided.
+    """
+    if schema is None:
+        return data
+
+    require_validate(installed=pydantic_installed)
+
+    if not _is_pydantic_model(schema):
+        raise ValueError("Invalid schema. Schema must be a Pydantic model class.")
+
+    validated = schema.model_validate(data)
+    return validated.model_dump()
diff --git a/pyproject.toml b/pyproject.toml
@@ -118,7 +118,7 @@ Twitter = "https://twitter.com/fabiocaccamo"
 
 [project.optional-dependencies]
 all = [
-    "python-benedict[io,parse,s3]",
+    "python-benedict[io,parse,s3,validate]",
 ]
 html = [
     "beautifulsoup4 >= 4.12.0, < 5.0.0",
@@ -139,6 +139,9 @@ s3 = [
 toml = [
     "toml >= 0.10.2, < 1.0.0",
 ]
+validate = [
+    "pydantic >= 1.10.0",
+]
 xls = [
     "openpyxl >= 3.0.0, < 4.0.0",
     "xlrd >= 2.0.0, < 3.0.0",

diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,7 @@ ftfy == 6.3.1
 mailchecker == 6.0.18
 openpyxl == 3.1.5
 phonenumbers == 9.0.15
+pydantic == 2.11.3
 python-dateutil == 2.9.0.post0
 python-fsutil == 0.15.0
 python-slugify == 8.0.4

diff --git a/tests/dicts/io/test_io_dict_schema.py b/tests/dicts/io/test_io_dict_schema.py
@@ -0,0 +1,77 @@
+import json
+import unittest
+
+from pydantic import BaseModel, ValidationError
+
+from benedict import benedict
+
+
+class TestIODictSchema(unittest.TestCase):
+    def setUp(self):
+        class User(BaseModel):
+            name: str
+            age: int
+            email: str
+
+        class UserList(BaseModel):
+            users: list[User]
+
+        class UserOptional(BaseModel):
+            name: str
+            age: int | None = None
+            email: str | None = None
+
+        self.User = User
+        self.UserList = UserList
+        self.UserOptional = UserOptional
+        self.valid_data = {
+            "name": "John",
+            "age": 30,
+            "email": "john@example.com",
+        }
+        self.invalid_data = {
+            "name": "John",
+            "age": "not_an_int",
+            "email": "john@example.com",
+        }
+        self.minimal_data = {"name": "John"}
+
+    def test_constructor_with_schema(self):
+        d = benedict(self.valid_data, schema=self.User)
+        self.assertEqual(d["name"], "John")
+        self.assertEqual(d["age"], 30)
+        self.assertEqual(d["email"], "john@example.com")
+
+        with self.assertRaises(ValidationError):
+            benedict(self.invalid_data, schema=self.User)
+
+    def test_constructor_with_schema_and_optional_fields(self):
+        d = benedict(self.minimal_data, schema=self.UserOptional)
+        self.assertEqual(d["name"], "John")
+        self.assertIsNone(d.get("age"))
+        self.assertIsNone(d.get("email"))
+
+    def test_constructor_with_invalid_schema(self):
+        class InvalidSchema:
+            pass
+
+        with self.assertRaises(ValueError):
+            benedict(self.valid_data, schema=InvalidSchema)
+
+        with self.assertRaises(ValueError):
+            benedict(self.valid_data, schema="not_a_schema")
+
+    def test_from_json_with_schema_and_valid_data(self):
+        json_data = json.dumps(self.valid_data)
+        d = benedict.from_json(json_data, schema=self.User)
+        self.assertEqual(d["name"], "John")
+        self.assertEqual(d["age"], 30)
+
+    def test_from_json_with_schema_and_invalid_data(self):
+        json_data = json.dumps(self.invalid_data)
+        with self.assertRaises(ValidationError):
+            benedict.from_json(json_data, schema=self.User)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/dicts/io/test_io_dict_validate.py b/tests/dicts/io/test_io_dict_validate.py
@@ -0,0 +1,82 @@
+import unittest
+
+from pydantic import BaseModel, ValidationError
+
+from benedict.dicts.io import IODict
+
+
+class TestIODictValidate(unittest.TestCase):
+    def setUp(self):
+        class User(BaseModel):
+            name: str
+            age: int
+            email: str
+
+        class UserOptional(BaseModel):
+            name: str
+            age: int | None = None
+            email: str | None = None
+
+        self.User = User
+        self.UserOptional = UserOptional
+        self.valid_data = {
+            "name": "John",
+            "age": 30,
+            "email": "john@example.com",
+        }
+        self.invalid_data = {
+            "name": "John",
+            "age": "not_an_int",
+            "email": "john@example.com",
+        }
+        self.minimal_data = {"name": "John"}
+        self.data_with_extra_fields = {
+            "name": "John",
+            "age": 30,
+            "email": "john@example.com",
+            "role": "admin",
+            "active": True,
+        }
+
+    def test_validate_valid_data(self):
+        d = IODict(self.valid_data)
+        d.validate(schema=self.User)
+        self.assertEqual(d["name"], "John")
+        self.assertEqual(d["age"], 30)
+        self.assertEqual(d["email"], "john@example.com")
+
+    def test_validate_invalid_data(self):
+        d = IODict(self.invalid_data)
+        with self.assertRaises(ValidationError):
+            d.validate(schema=self.User)
+
+    def test_validate_optional_fields(self):
+        d = IODict(self.minimal_data)
+        d.validate(schema=self.UserOptional)
+        self.assertEqual(d["name"], "John")
+        self.assertIsNone(d.get("age"))
+        self.assertIsNone(d.get("email"))
+
+    def test_validate_removes_extra_fields(self):
+        d = IODict(self.data_with_extra_fields)
+        d.validate(schema=self.User)
+        # required fields are preserved
+        self.assertEqual(d["name"], "John")
+        self.assertEqual(d["age"], 30)
+        self.assertEqual(d["email"], "john@example.com")
+        # extra fields are removed
+        self.assertNotIn("role", d)
+        self.assertNotIn("active", d)
+        # only the schema fields exist
+        self.assertEqual(set(d.keys()), {"name", "age", "email"})
+
+    def test_validate_invalid_schema(self):
+        class InvalidSchema:
+            pass
+
+        d = IODict(self.valid_data)
+        with self.assertRaises(ValueError):
+            d.validate(schema=InvalidSchema)
+
+        with self.assertRaises(ValueError):
+            d.validate(schema="not_a_schema")