Skip to content

Commit 89e44f8

Browse files
authored
Merge pull request splitio#47 from splitio/addMurmurHashSupport
Add murmur hash support
2 parents 702c7e3 + de0b056 commit 89e44f8

15 files changed

+200345
-162
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,6 @@ target/
6363

6464
# PyCharm
6565
.idea
66+
67+
# rope autocomplete
68+
.ropeproject/

setup.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@
55
from sys import version_info
66

77
tests_require = ['flake8', 'nose', 'coverage']
8-
install_requires = ['arrow>=0.7.0', 'requests>=2.9.1', 'future>=0.15.2', 'docopt>=0.6.2']
8+
install_requires = [
9+
'arrow>=0.7.0',
10+
'requests>=2.9.1',
11+
'future>=0.15.2',
12+
'docopt>=0.6.2',
13+
]
914

1015
if version_info < (3,):
1116
tests_require += ['mock']
1217
install_requires += ['six>=1.10.0', 'futures>=3.0.5', 'enum34>=1.1.5']
1318

14-
with open(path.join(path.abspath(path.dirname(__file__)), 'splitio', 'version.py')) as f:
19+
with open(path.join(path.abspath(path.dirname(__file__)),
20+
'splitio', 'version.py')) as f:
1521
exec(f.read())
1622

1723
setup(name='splitio_client',
@@ -20,14 +26,16 @@
2026
author='Patricio Echague, Sebastian Arrubia',
2127
author_email='pato@split.io, sebastian@split.io',
2228
url='https://github.com/splitio/python-client',
23-
download_url='https://github.com/splitio/python-client/tarball/' + __version__,
29+
download_url=('https://github.com/splitio/python-client/tarball/' +
30+
__version__),
2431
license='Apache License 2.0',
2532
install_requires=install_requires,
2633
tests_require=tests_require,
2734
extras_require={
2835
'test': tests_require,
2936
'redis': ['redis>=2.10.5', 'jsonpickle>=0.9.3'],
30-
'uwsgi': ['uwsgi>=2.0.0', 'jsonpickle>=0.9.3']
37+
'uwsgi': ['uwsgi>=2.0.0', 'jsonpickle>=0.9.3'],
38+
'cpphash': ['splitmmh3']
3139
},
3240
setup_requires=['nose'],
3341
classifiers=[
@@ -39,4 +47,4 @@
3947
'Programming Language :: Python :: 3',
4048
'Topic :: Software Development :: Libraries'
4149
],
42-
packages=['splitio','splitio.update_scripts','splitio.bin'])
50+
packages=['splitio', 'splitio.update_scripts', 'splitio.bin'])

splitio/clients.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,12 @@ def _get_treatment_for_split(self, split, matching_key, bucketing_key, attribute
177177

178178
for condition in split.conditions:
179179
if condition.matcher.match(matching_key, attributes=attributes):
180-
return self.get_splitter().get_treatment(bucketing_key, split.seed, condition.partitions), condition.label
180+
return self.get_splitter().get_treatment(
181+
bucketing_key,
182+
split.seed,
183+
condition.partitions,
184+
split.algo
185+
), condition.label
181186

182187
# No condition matches
183188
return None, None

splitio/hashfns/__init__.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
This module contains hash functions implemented in pure python
3+
as well as the optional import (if installed) of a C compiled murmur hash
4+
function with python bindings.
5+
"""
6+
from __future__ import absolute_import, division, print_function, \
7+
unicode_literals
8+
9+
from splitio.hashfns import legacy
10+
11+
try:
12+
# First attempt to import module with C++ core (faster)
13+
import mmh3
14+
from ctypes import c_uint
15+
16+
def _murmur_hash(key, seed):
17+
ukey = key.encode('utf8')
18+
return c_uint(mmh3.hash(ukey, seed)).value
19+
except:
20+
# Fallback to interpreted python hash algoritm (slower)
21+
from splitio.hashfns import murmur3py
22+
_murmur_hash = murmur3py.murmur32_py
23+
24+
25+
_HASH_ALGORITHMS = {
26+
'legacy': legacy.legacy_hash,
27+
'murmur': _murmur_hash
28+
}
29+
30+
31+
def get_hash_fn(algo):
32+
"""
33+
Return appropriate hash function for requested algorithm
34+
:param algo: Algoritm to use
35+
:return: Hash function
36+
:rtype: function
37+
"""
38+
return _HASH_ALGORITHMS.get(algo, legacy.legacy_hash)

splitio/hashfns/legacy.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from __future__ import absolute_import, division, print_function, \
2+
unicode_literals
3+
4+
5+
def as_int32(value):
6+
if not -2147483649 <= value <= 2147483648:
7+
return (value + 2147483648) % 4294967296 - 2147483648
8+
return value
9+
10+
11+
def legacy_hash(key, seed):
12+
"""
13+
Generates a hash for a key and a feature seed.
14+
:param key: The key for which to get the hash
15+
:type key: str
16+
:param seed: The feature seed
17+
:type seed: int
18+
:return: The hash for the key and seed
19+
:rtype: int
20+
"""
21+
h = 0
22+
23+
for c in map(ord, key):
24+
h = as_int32(as_int32(31 * as_int32(h)) + c)
25+
26+
return int(as_int32(h ^ as_int32(seed)))

splitio/hashfns/murmur3py.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from __future__ import absolute_import, division, print_function, \
2+
unicode_literals
3+
4+
5+
import sys as _sys
6+
if (_sys.version_info > (3, 0)):
7+
def xrange(a, b, c):
8+
return range(a, b, c)
9+
del _sys
10+
11+
12+
def murmur32_py(key, seed=0x0):
13+
"""
14+
Pure python implementation of murmur32 hash
15+
"""
16+
17+
key = bytearray(key, 'utf-8')
18+
19+
def fmix(h):
20+
h ^= h >> 16
21+
h = (h * 0x85ebca6b) & 0xFFFFFFFF
22+
h ^= h >> 13
23+
h = (h * 0xc2b2ae35) & 0xFFFFFFFF
24+
h ^= h >> 16
25+
return h
26+
27+
length = len(key)
28+
nblocks = int(length/4)
29+
30+
h1 = seed & 0xFFFFFFFF
31+
32+
c1 = 0xcc9e2d51
33+
c2 = 0x1b873593
34+
35+
# body
36+
for block_start in xrange(0, nblocks * 4, 4):
37+
# ??? big endian?
38+
k1 = key[block_start + 3] << 24 | \
39+
key[block_start + 2] << 16 | \
40+
key[block_start + 1] << 8 | \
41+
key[block_start + 0]
42+
43+
k1 = (c1 * k1) & 0xFFFFFFFF
44+
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
45+
k1 = (c2 * k1) & 0xFFFFFFFF
46+
47+
h1 ^= k1
48+
h1 = (h1 << 13 | h1 >> 19) & 0xFFFFFFFF # inlined ROTL32
49+
h1 = (h1 * 5 + 0xe6546b64) & 0xFFFFFFFF
50+
51+
# tail
52+
tail_index = nblocks * 4
53+
k1 = 0
54+
tail_size = length & 3
55+
56+
if tail_size >= 3:
57+
k1 ^= key[tail_index + 2] << 16
58+
if tail_size >= 2:
59+
k1 ^= key[tail_index + 1] << 8
60+
if tail_size >= 1:
61+
k1 ^= key[tail_index + 0]
62+
63+
if tail_size > 0:
64+
k1 = (k1 * c1) & 0xFFFFFFFF
65+
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
66+
k1 = (k1 * c2) & 0xFFFFFFFF
67+
h1 ^= k1
68+
69+
unsigned_val = fmix(h1 ^ length)
70+
return unsigned_val

splitio/redis_support.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def missing_redis_dependencies(*args, **kwargs):
2323
from splitio.matchers import UserDefinedSegmentMatcher
2424
from splitio.metrics import BUCKETS
2525
from splitio.segments import Segment
26-
from splitio.splits import Split, SplitParser
26+
from splitio.splits import Split, SplitParser, HashAlgorithm
2727
from splitio.impressions import Impression
2828
from splitio.utils import bytes_to_string
2929

@@ -647,8 +647,13 @@ def __init__(self, segment_cache):
647647
self._segment_cache = segment_cache
648648

649649
def _parse_split(self, split, block_until_ready=False):
650-
return RedisSplit(split['name'], split['seed'], split['killed'], split['defaultTreatment'],
651-
split['trafficTypeName'], split['status'], split['changeNumber'], segment_cache=self._segment_cache)
650+
return RedisSplit(
651+
split['name'], split['seed'], split['killed'],
652+
split['defaultTreatment'], split['trafficTypeName'],
653+
split['status'], split['changeNumber'],
654+
segment_cache=self._segment_cache,
655+
algo=split.get('algo')
656+
)
652657

653658
def _parse_matcher_in_segment(self, partial_split, matcher, block_until_ready=False, *args,
654659
**kwargs):
@@ -660,7 +665,7 @@ def _parse_matcher_in_segment(self, partial_split, matcher, block_until_ready=Fa
660665

661666

662667
class RedisSplit(Split):
663-
def __init__(self, name, seed, killed, default_treatment, traffic_type_name, status, change_number, conditions=None, segment_cache=None):
668+
def __init__(self, name, seed, killed, default_treatment, traffic_type_name, status, change_number, conditions=None, segment_cache=None, algo=HashAlgorithm.LEGACY):
664669
"""A split implementation that mantains a reference to the segment cache so segments can
665670
be easily pickled and unpickled.
666671
:param name: Name of the feature

0 commit comments

Comments
 (0)