Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide ImmutableDict & ImmutableMultiDict factory methods which ban duplicate additions (Closes issue #53) #55

Merged
merged 8 commits into from
Jun 20, 2019
6 changes: 5 additions & 1 deletion immutablecollections/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@
from attr.exceptions import FrozenInstanceError

from immutablecollections.immutablecollection import ImmutableCollection
from immutablecollections._immutabledict import immutabledict, ImmutableDict
from immutablecollections._immutabledict import (
immutabledict,
ImmutableDict,
immutabledict_from_unique_keys,
)
from immutablecollections._immutableset import (
immutableset,
immutableset_from_unique_elements,
Expand Down
40 changes: 38 additions & 2 deletions immutablecollections/_immutabledict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
Mapping,
TypeVar,
Tuple,
Set,
List,
Iterator,
Optional,
Union,
Expand Down Expand Up @@ -32,7 +34,7 @@


def immutabledict(
iterable: Optional[AllowableSourceType] = None
iterable: Optional[AllowableSourceType] = None, *, forbid_duplicate_keys: bool = False
) -> "ImmutableDict[KT, VT]":
"""
Create an immutable dictionary with the given mappings.
Expand All @@ -51,7 +53,8 @@ def immutabledict(

if isinstance(iterable, ImmutableDict):
# if an ImmutableDict is input, we can safely just return it,
# since the object can safely be shared
# since the object can safely be shared.
# It is also guaranteed not to have repeat keys
return iterable

if isinstance(iterable, Dict) and not DICT_ITERATION_IS_DETERMINISTIC:
Expand All @@ -66,13 +69,46 @@ def immutabledict(
f"Cannot create an immutabledict from {type(iterable)}, only {InstantiationTypes}"
)

if forbid_duplicate_keys:
keys: List[KT]
# `for x in dict` grabs keys, but `for x in pairs` grabs pairs, so we must branch
if isinstance(iterable, Mapping):
# duplicate keys are possible if input is e.g. a multidict
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mapping guarantees/requires that .keys return a set (and our MultiDicts don't implement Mapping)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What types should we check for then? dict_keys has some set behavior but fails isinstance:

In [1]: d = {i:i*2 for i in range(10000)}

In [2]: type(d.keys())
Out[2]: dict_keys

In [3]: isinstance(d.keys(), set)
Out[3]: False

In [4]: from typing import Mapping

In [5]: isinstance(d, Mapping)
Out[5]: True

In [6]: from typing import Set

In [7]: isinstance(d.keys(), Set)
Out[7]: False

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could branch on whether iterable has .keys, or we could have a weaker debug message that just says some unknown duplicate keys were present

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code is actually fine; it's just the comment which is wrong :-)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qpwo : We do want to include the duplicate elements. But we want to keep the "happy path" (which will be by far the most common) very fast; once we know there is an error, then we can do the work to figure out what the duplicate elements are (because no one cares if their crash takes an extra millisecond longer :-) )

keys = list(iterable.keys())
else:
# iterable must be a (key, value) pair iterable
iterable = list(iterable) # in case iterable is consumed by iteration
keys = [key for key, value in iterable]
seen: Set[KT] = set()
duplicated: Set[KT] = set()
for key in keys:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, this is really super-slow compared to the call to dict() hidden inside _RegularDictBackedImmutableDict (which is implemented in C). Instead, I'd suggest doing a two-part approach:

  • if forbid_duplicate_keys is True, then list-ify the input Iterable.
  • after we construct ret, then if forbid_duplicate_keys is True, compare then len of the ret to the len of the input iterable. If there is a mismatch, then do the work to construct the error message

if key in seen:
duplicated.add(key)
else:
seen.add(key)
if duplicated:
raise ValueError(
"forbid_duplicate_keys=True, but some keys "
"occur multiple times in input: {}".format(duplicated)
)

ret: ImmutableDict[KT, VT] = _RegularDictBackedImmutableDict(iterable)
if ret:
return ret
else:
return _EMPTY


def immutabledict_from_unique_keys(
iterable: Optional[AllowableSourceType] = None
) -> "ImmutableDict[KT, VT]":
"""
Create an immutable dictionary with the given mappings, but raise ValueError if
*iterable* contains the same item twice. More information in `immutabledict`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring here is not quite accurate - we throw on a duplicate key, not a duplicate item

"""
return immutabledict(iterable, forbid_duplicate_keys=True)


class ImmutableDict(ImmutableCollection[KT], Mapping[KT, VT], metaclass=ABCMeta):
"""
A ``Mapping`` implementation which is locally immutable.
Expand Down
11 changes: 8 additions & 3 deletions immutablecollections/_immutableset.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,21 @@ def immutableset(
"iteration order, specify disable_order_check=True"
)

duplicated = set() # only used if forbid_duplicate_elements=True
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason to spend time allocating this unless forbid_duplicate_elements is True

iteration_order = []
containment_set: MutableSet[T] = set()
for value in iterable:
if value not in containment_set:
containment_set.add(value)
iteration_order.append(value)
elif forbid_duplicate_elements:
raise ValueError(
"Input collection has duplicate items and forbid_duplicate_elements=False"
)
duplicated.add(value)

if forbid_duplicate_elements and duplicated:
raise ValueError(
"forbid_duplicate_elements=True, but some elements "
"occur multiple times in input: {}".format(duplicated)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer f-strings to .format

)

if iteration_order:
if len(iteration_order) == 1:
Expand Down
23 changes: 22 additions & 1 deletion tests/test_immutabledict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
from collections.abc import Mapping
from unittest import TestCase

from immutablecollections import immutabledict, ImmutableDict
from immutablecollections import (
immutabledict,
ImmutableDict,
immutabledict_from_unique_keys,
)


class TestImmutableDict(TestCase):
Expand Down Expand Up @@ -138,3 +142,20 @@ def test_pickling(self):
immutabledict([(5, "apple"), (2, "banana")]).__reduce__(),
(immutabledict, (((5, "apple"), (2, "banana")),)),
)

def test_immutabledict_duplication_blocking(self):
bad = [(7, 8), (9, 10), (7, 11)]
with self.assertRaises(ValueError):
immutabledict(bad, forbid_duplicate_keys=True)
with self.assertRaises(ValueError):
immutabledict_from_unique_keys(bad)
with self.assertRaises(ValueError):
immutabledict((x for x in bad), forbid_duplicate_keys=True)
with self.assertRaises(ValueError):
immutabledict_from_unique_keys(x for x in bad)

good = [(7, 8), (9, 10), (12, 11)]
immutabledict(good, forbid_duplicate_keys=True)
immutabledict_from_unique_keys(good)
immutabledict((x for x in good), forbid_duplicate_keys=True)
immutabledict_from_unique_keys(x for x in good)
19 changes: 13 additions & 6 deletions tests/test_immutableset.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,11 +286,18 @@ def test_subtract_from_other_set_types(self):
self.assertEqual({"a"}, frozenset(["a", "b"]) - immutableset(["b", "c"]))

def test_forbid_duplicate_elements(self):
bad = [4, 6, 7, 9, 7]
with self.assertRaises(ValueError):
immutableset([4, 6, 7, 9, 7], forbid_duplicate_elements=True)
immutableset([3, 7, 5, 9], forbid_duplicate_elements=True)

def test_immutableset_from_unique_elements(self):
immutableset(bad, forbid_duplicate_elements=True)
with self.assertRaises(ValueError):
immutableset_from_unique_elements(bad)
with self.assertRaises(ValueError):
immutableset_from_unique_elements([4, 6, 7, 9, 7])
immutableset_from_unique_elements([3, 7, 5, 9])
immutableset((x for x in bad), forbid_duplicate_elements=True)
with self.assertRaises(ValueError):
immutableset_from_unique_elements(x for x in bad)

good = [3, 7, 5, 9]
immutableset(good, forbid_duplicate_elements=True)
immutableset_from_unique_elements(good)
immutableset((x for x in good), forbid_duplicate_elements=True)
immutableset_from_unique_elements(x for x in good)