Skip to content

Add deterministic encryption #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ class AnotherModel(models.Model):

Supported field types include: `EncryptedCharField`, `EncryptedTextField`, `EncryptedDateField`, `EncryptedDateTimeField`, `EncryptedEmailField`, and `EncryptedIntegerField`.

## Deterministic Encryption

`DeterministicEncryptedCharField` provides support for [Deterministic AEAD](https://developers.google.com/tink/deterministic-aead) which means value in the field can be queried with exact matches. However, unlike normal AEAD encryption, an attacker can verify that two messages are equal.

Deterministic encryption requires key of type `AES-SIV` and supports Associated Data.

### Associated Data

The encrypted fields make use of `Authenticated Encryption With Associated Data (AEAD)` which offers confidentiality and integrity within the same mode of operation. This allows the caller to specify a cleartext fragment named `additional authenticated data (aad)` to the encryption and decryption operations and receive cryptographic guarantees that the ciphertext data has not been tampered with.
Expand Down
3 changes: 2 additions & 1 deletion tink_fields/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .fields import * # noqa
from tink import aead
from tink import aead, daead

aead.register()
daead.register()
181 changes: 146 additions & 35 deletions tink_fields/fields.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
from functools import lru_cache
from typing import Any, Callable, Dict, Optional
from django.utils.functional import cached_property
from typing import Any, Callable, Dict, Optional, TYPE_CHECKING
from django.db import models
from django.db.models.lookups import In
from django.core.exceptions import FieldError, ImproperlyConfigured
from tink import (
KeysetHandle,
cleartext_keyset_handle,
read_keyset_handle,
JsonKeysetReader,
aead,
daead,
)
from django.conf import settings
from dataclasses import dataclass
from os.path import exists
from django.utils.encoding import force_bytes, force_str
from django.db.backends.base.base import BaseDatabaseWrapper

if TYPE_CHECKING:
from django.db.backends.base.base import BaseDatabaseWrapper


__all__ = [
Expand All @@ -24,6 +28,11 @@
"EncryptedIntegerField",
"EncryptedDateField",
"EncryptedDateTimeField",
"EncryptedBinaryField",
"DeterministicEncryptedField",
"DeterministicEncryptedCharField",
"DeterministicEncryptedEmailField",
"DeterministicEncryptedIntegerField",
]


Expand All @@ -39,14 +48,14 @@ def validate(self):

if not exists(self.path):
raise ImproperlyConfigured(f"Keyset {self.path} does not exist")

if not self.cleartext and self.master_key_aead is None:
raise ImproperlyConfigured(f"Encrypted keysets must specify `master_key_aead`")

raise ImproperlyConfigured(
f"Encrypted keysets must specify `master_key_aead`"
)

class EncryptedField(models.Field):
"""A field that uses Tink primitives to protect the confidentiality and integrity of data"""

class BaseEncryptedField(models.Field):
_unsupported_properties = ["primary_key", "db_index", "unique"]
_internal_type = "BinaryField"

Expand All @@ -65,7 +74,7 @@ def __init__(self, *args, **kwargs):
self._keyset_handle = self._get_tink_keyset_handle()
self._aad_callback = kwargs.pop("aad_callback", lambda x: b"")

super(EncryptedField, self).__init__(*args, **kwargs)
super(BaseEncryptedField, self).__init__(*args, **kwargs)

def _get_config(self) -> Dict[str, Any]:
config = getattr(settings, "TINK_FIELDS_CONFIG", None)
Expand Down Expand Up @@ -93,45 +102,109 @@ def _get_tink_keyset_handle(self) -> KeysetHandle:
return cleartext_keyset_handle.read(reader)
return read_keyset_handle(reader, keyset_config.master_key_aead)

@lru_cache(maxsize=None)
def _get_aead_primitive(self) -> aead.Aead:
return self._keyset_handle.primitive(aead.Aead)

def get_internal_type(self) -> str:
return self._internal_type

def get_db_prep_save(self, value: Any, connection: BaseDatabaseWrapper) -> Any:
@cached_property
def validators(self):
# Temporarily pretend to be whatever type of field we're masquerading
# as, for purposes of constructing validators (needed for
# IntegerField and subclasses).
self.__dict__["_internal_type"] = super(
BaseEncryptedField, self
).get_internal_type()
try:
return super(BaseEncryptedField, self).validators
finally:
del self.__dict__["_internal_type"]

def to_python_prepare(self, value: bytes) -> Any:
if isinstance(self, models.BinaryField):
return value

return force_str(value)


class EncryptedField(BaseEncryptedField):
"""A field that uses Tink primitives to protect the confidentiality and integrity of data"""

@cached_property
def _aead_primitive(self) -> aead.Aead:
return self._keyset_handle.primitive(aead.Aead)

def get_db_prep_save(self, value: Any, connection: "BaseDatabaseWrapper") -> Any:
val = super(EncryptedField, self).get_db_prep_save(value, connection)
if val is not None:
return connection.Database.Binary(
self._get_aead_primitive().encrypt(
self._aead_primitive.encrypt(force_bytes(val), self._aad_callback(self))
)

def from_db_value(self, value, expression, connection, *args):
if value is not None:
return self.to_python(
self.to_python_prepare(
self._aead_primitive.decrypt(bytes(value), self._aad_callback(self))
)
)


class DeterministicEncryptedField(BaseEncryptedField):
"""Field that is similar to EncryptedField, but support exact match lookups"""

_unsupported_properties = []

@cached_property
def _daead_primitive(self) -> daead.DeterministicAead:
return self._keyset_handle.primitive(daead.DeterministicAead)

def get_db_prep_value(
self, value: Any, connection: "BaseDatabaseWrapper", prepared=False
) -> Any:

val = super(DeterministicEncryptedField, self).get_db_prep_value(
value, connection, prepared
)
if val is not None:
return connection.Database.Binary(
self._daead_primitive.encrypt_deterministically(
force_bytes(val), self._aad_callback(self)
)
)

def from_db_value(self, value, expression, connection, *args):
if value is not None:
return self.to_python(
force_str(
self._get_aead_primitive().decrypt(
self.to_python_prepare(
self._daead_primitive.decrypt_deterministically(
bytes(value), self._aad_callback(self)
)
)
)

@property
@lru_cache(maxsize=None)
def validators(self):
# Temporarily pretend to be whatever type of field we're masquerading
# as, for purposes of constructing validators (needed for
# IntegerField and subclasses).
self.__dict__["_internal_type"] = super(
EncryptedField, self
).get_internal_type()
try:
return super(EncryptedField, self).validators
finally:
del self.__dict__["_internal_type"]
def get_db_values_all_keys(
self, value: Any, connection: "BaseDatabaseWrapper", prepared=False
) -> Any:
"""Like get_db_prep_value but return array of values encrypted with every keys in the keyset"""
val = super(DeterministicEncryptedField, self).get_db_prep_value(
value, connection, prepared
)
if val is None:
return []

out = []
aad = self._aad_callback(self)
# XXX: This would run another query. Is there any way to signal that we want a cached all using
# the same primitive set interface?
for items in self._daead_primitive._primitive_set.all():
for key in items:
out.append(
connection.Database.Binary(
key.identifier
+ key.primitive.encrypt_deterministically(force_bytes(val), aad)
)
)

return out


def get_prep_lookup(self):
Expand All @@ -143,12 +216,32 @@ def get_prep_lookup(self):
)


for name, lookup in models.Field.class_lookups.items():
if name != "isnull":
lookup_class = type(
"EncryptedField" + name, (lookup,), {"get_prep_lookup": get_prep_lookup}
class DeterministicEncryptedFieldExactLookup(In):
lookup_name = "exact"

def get_prep_lookup(self):
self.rhs = [self.rhs]
return super().get_prep_lookup()

def get_db_prep_lookup(self, value, connection):
assert len(value) == 1
return (
"%s",
self.lhs.output_field.get_db_values_all_keys(
list(value)[0], connection, prepared=True
),
)
EncryptedField.register_lookup(lookup_class)


for name, lookup in models.Field.class_lookups.items():
for cls in (EncryptedField, DeterministicEncryptedField):
if name != "isnull":
lookup_class = type(
cls.__name__ + name, (lookup,), {"get_prep_lookup": get_prep_lookup}
)
cls.register_lookup(lookup_class)

DeterministicEncryptedField.register_lookup(DeterministicEncryptedFieldExactLookup)


class EncryptedTextField(EncryptedField, models.TextField):
Expand All @@ -173,3 +266,21 @@ class EncryptedDateField(EncryptedField, models.DateField):

class EncryptedDateTimeField(EncryptedField, models.DateTimeField):
pass


class EncryptedBinaryField(EncryptedField, models.BinaryField):
"""Encrypted raw binary data, must be under 2^32 bytes (4.295GB)"""


class DeterministicEncryptedCharField(DeterministicEncryptedField, models.CharField):
pass


class DeterministicEncryptedEmailField(DeterministicEncryptedField, models.EmailField):
pass


class DeterministicEncryptedIntegerField(
DeterministicEncryptedField, models.IntegerField
):
pass
20 changes: 20 additions & 0 deletions tink_fields/test/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class EncryptedDateTime(models.Model):
value = fields.EncryptedDateTimeField()


class EncryptedBinary(models.Model):
value = fields.EncryptedBinaryField()


class EncryptedNullable(models.Model):
value = fields.EncryptedIntegerField(null=True)

Expand All @@ -41,3 +45,19 @@ class EncryptedCharWithFixedAad(models.Model):

class EncryptedCharWithAlternateKeyset(models.Model):
value = fields.EncryptedCharField(max_length=25, keyset="alternate")


class DeterministicEncryptedChar(models.Model):
value = fields.DeterministicEncryptedCharField(max_length=25, keyset="daead")


class DeterministicEncryptedEmail(models.Model):
value = fields.DeterministicEncryptedEmailField(keyset="daead")


class DeterministicEncryptedInt(models.Model):
value = fields.DeterministicEncryptedIntegerField(keyset="daead")


class DeterministicEncryptedIntNullable(models.Model):
value = fields.DeterministicEncryptedIntegerField(keyset="daead", null=True)
4 changes: 4 additions & 0 deletions tink_fields/test/settings/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,8 @@
"cleartext": True,
"path": os.path.join(HERE, "../test_plaintext_keyset.json"),
},
"daead": {
"cleartext": True,
"path": os.path.join(HERE, "../test_plaintext_daead_keyset.json"),
},
}
52 changes: 48 additions & 4 deletions tink_fields/test/test_fields.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import date, datetime

from django.db import connection, models as dj_models
from django.utils.encoding import force_bytes, force_str
from django.db import connection
from django.utils.encoding import force_bytes
import pytest

from . import models
Expand All @@ -21,6 +21,7 @@
[datetime(2015, 2, 5, 15), datetime(2015, 2, 8, 16)],
),
(models.EncryptedCharWithAlternateKeyset, ["foo", "bar"]),
(models.EncryptedBinary, [b"1234", b"asdf"]),
],
)
class TestEncryptedFieldQueries(object):
Expand All @@ -32,12 +33,55 @@ def test_insert(self, db, model, vals):
with connection.cursor() as cur:
cur.execute("SELECT value FROM %s" % model._meta.db_table)
data = [
force_str(
field._get_aead_primitive().decrypt(
field.to_python_prepare(
field._aead_primitive.decrypt(
force_bytes(r[0]), aad_callback(field)
)
)
for r in cur.fetchall()
]

if model is models.EncryptedBinary:
assert list([bytes(field.to_python(item)) for item in data]) == [vals[0]]
else:
assert list(map(field.to_python, data)) == [vals[0]]


def test_encrypted_nullable(db):
models.EncryptedNullable(value=None).save()
assert models.EncryptedNullable.objects.get(value__isnull=True)


@pytest.mark.parametrize(
"model,vals",
[
(models.DeterministicEncryptedChar, ["one", "two"]),
(models.DeterministicEncryptedEmail, ["a@example.com", "b@example.com"]),
(models.DeterministicEncryptedInt, [1, 2]),
],
)
class TestDeterministicEncryptedFieldQueries(object):
def test_insert(self, db, model, vals):
"""Data stored in DB is actually encrypted."""
field = model._meta.get_field("value")
aad_callback = getattr(field, "_aad_callback")
model.objects.create(value=vals[0])
with connection.cursor() as cur:
cur.execute("SELECT value FROM %s" % model._meta.db_table)
data = [
field.to_python_prepare(
field._daead_primitive.decrypt_deterministically(
force_bytes(r[0]), aad_callback(field)
)
)
for r in cur.fetchall()
]

assert list(map(field.to_python, data)) == [vals[0]]

def test_search(self, db, model, vals):
model.objects.create(value=vals[0])
model.objects.create(value=vals[1])
out = model.objects.filter(value=vals[0])
assert len(out) == 1
assert out[0].value == vals[0]
1 change: 1 addition & 0 deletions tink_fields/test/test_plaintext_daead_keyset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"primaryKeyId":508056547,"key":[{"keyData":{"typeUrl":"type.googleapis.com/google.crypto.tink.AesSivKey","value":"EkDc2ZTmEZO2wrwmfEBWTEwoRd2WrDqPikE8rseHs3Nx/exobkxiQEZtPwTM37iNdwVvSouyDLGWUjO3T3D8v0LC","keyMaterialType":"SYMMETRIC"},"status":"ENABLED","keyId":508056547,"outputPrefixType":"TINK"}]}