Skip to content

Add murmur hash support #47

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,6 @@ target/

# PyCharm
.idea

# rope autocomplete
.ropeproject/
18 changes: 13 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
from sys import version_info

tests_require = ['flake8', 'nose', 'coverage']
install_requires = ['arrow>=0.7.0', 'requests>=2.9.1', 'future>=0.15.2', 'docopt>=0.6.2']
install_requires = [
'arrow>=0.7.0',
'requests>=2.9.1',
'future>=0.15.2',
'docopt>=0.6.2',
]

if version_info < (3,):
tests_require += ['mock']
install_requires += ['six>=1.10.0', 'futures>=3.0.5', 'enum34>=1.1.5']

with open(path.join(path.abspath(path.dirname(__file__)), 'splitio', 'version.py')) as f:
with open(path.join(path.abspath(path.dirname(__file__)),
'splitio', 'version.py')) as f:
exec(f.read())

setup(name='splitio_client',
Expand All @@ -20,14 +26,16 @@
author='Patricio Echague, Sebastian Arrubia',
author_email='pato@split.io, sebastian@split.io',
url='https://github.com/splitio/python-client',
download_url='https://github.com/splitio/python-client/tarball/' + __version__,
download_url=('https://github.com/splitio/python-client/tarball/' +
__version__),
license='Apache License 2.0',
install_requires=install_requires,
tests_require=tests_require,
extras_require={
'test': tests_require,
'redis': ['redis>=2.10.5', 'jsonpickle>=0.9.3'],
'uwsgi': ['uwsgi>=2.0.0', 'jsonpickle>=0.9.3']
'uwsgi': ['uwsgi>=2.0.0', 'jsonpickle>=0.9.3'],
'cpphash': ['splitmmh3']
},
setup_requires=['nose'],
classifiers=[
Expand All @@ -39,4 +47,4 @@
'Programming Language :: Python :: 3',
'Topic :: Software Development :: Libraries'
],
packages=['splitio','splitio.update_scripts','splitio.bin'])
packages=['splitio', 'splitio.update_scripts', 'splitio.bin'])
7 changes: 6 additions & 1 deletion splitio/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,12 @@ def _get_treatment_for_split(self, split, matching_key, bucketing_key, attribute

for condition in split.conditions:
if condition.matcher.match(matching_key, attributes=attributes):
return self.get_splitter().get_treatment(bucketing_key, split.seed, condition.partitions), condition.label
return self.get_splitter().get_treatment(
bucketing_key,
split.seed,
condition.partitions,
split.algo
), condition.label

# No condition matches
return None, None
Expand Down
38 changes: 38 additions & 0 deletions splitio/hashfns/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
This module contains hash functions implemented in pure python
as well as the optional import (if installed) of a C compiled murmur hash
function with python bindings.
"""
from __future__ import absolute_import, division, print_function, \
unicode_literals

from splitio.hashfns import legacy

try:
# First attempt to import module with C++ core (faster)
import mmh3
from ctypes import c_uint

def _murmur_hash(key, seed):
ukey = key.encode('utf8')
return c_uint(mmh3.hash(ukey, seed)).value
except:
# Fallback to interpreted python hash algoritm (slower)
from splitio.hashfns import murmur3py
_murmur_hash = murmur3py.murmur32_py


_HASH_ALGORITHMS = {
'legacy': legacy.legacy_hash,
'murmur': _murmur_hash
}


def get_hash_fn(algo):
"""
Return appropriate hash function for requested algorithm
:param algo: Algoritm to use
:return: Hash function
:rtype: function
"""
return _HASH_ALGORITHMS.get(algo, legacy.legacy_hash)
26 changes: 26 additions & 0 deletions splitio/hashfns/legacy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import absolute_import, division, print_function, \
unicode_literals


def as_int32(value):
if not -2147483649 <= value <= 2147483648:
return (value + 2147483648) % 4294967296 - 2147483648
return value


def legacy_hash(key, seed):
"""
Generates a hash for a key and a feature seed.
:param key: The key for which to get the hash
:type key: str
:param seed: The feature seed
:type seed: int
:return: The hash for the key and seed
:rtype: int
"""
h = 0

for c in map(ord, key):
h = as_int32(as_int32(31 * as_int32(h)) + c)

return int(as_int32(h ^ as_int32(seed)))
70 changes: 70 additions & 0 deletions splitio/hashfns/murmur3py.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import absolute_import, division, print_function, \
unicode_literals


import sys as _sys
if (_sys.version_info > (3, 0)):
def xrange(a, b, c):
return range(a, b, c)
del _sys


def murmur32_py(key, seed=0x0):
"""
Pure python implementation of murmur32 hash
"""

key = bytearray(key, 'utf-8')

def fmix(h):
h ^= h >> 16
h = (h * 0x85ebca6b) & 0xFFFFFFFF
h ^= h >> 13
h = (h * 0xc2b2ae35) & 0xFFFFFFFF
h ^= h >> 16
return h

length = len(key)
nblocks = int(length/4)

h1 = seed & 0xFFFFFFFF

c1 = 0xcc9e2d51
c2 = 0x1b873593

# body
for block_start in xrange(0, nblocks * 4, 4):
# ??? big endian?
k1 = key[block_start + 3] << 24 | \
key[block_start + 2] << 16 | \
key[block_start + 1] << 8 | \
key[block_start + 0]

k1 = (c1 * k1) & 0xFFFFFFFF
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
k1 = (c2 * k1) & 0xFFFFFFFF

h1 ^= k1
h1 = (h1 << 13 | h1 >> 19) & 0xFFFFFFFF # inlined ROTL32
h1 = (h1 * 5 + 0xe6546b64) & 0xFFFFFFFF

# tail
tail_index = nblocks * 4
k1 = 0
tail_size = length & 3

if tail_size >= 3:
k1 ^= key[tail_index + 2] << 16
if tail_size >= 2:
k1 ^= key[tail_index + 1] << 8
if tail_size >= 1:
k1 ^= key[tail_index + 0]

if tail_size > 0:
k1 = (k1 * c1) & 0xFFFFFFFF
k1 = (k1 << 15 | k1 >> 17) & 0xFFFFFFFF # inlined ROTL32
k1 = (k1 * c2) & 0xFFFFFFFF
h1 ^= k1

unsigned_val = fmix(h1 ^ length)
return unsigned_val
13 changes: 9 additions & 4 deletions splitio/redis_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def missing_redis_dependencies(*args, **kwargs):
from splitio.matchers import UserDefinedSegmentMatcher
from splitio.metrics import BUCKETS
from splitio.segments import Segment
from splitio.splits import Split, SplitParser
from splitio.splits import Split, SplitParser, HashAlgorithm
from splitio.impressions import Impression
from splitio.utils import bytes_to_string

Expand Down Expand Up @@ -647,8 +647,13 @@ def __init__(self, segment_cache):
self._segment_cache = segment_cache

def _parse_split(self, split, block_until_ready=False):
return RedisSplit(split['name'], split['seed'], split['killed'], split['defaultTreatment'],
split['trafficTypeName'], split['status'], split['changeNumber'], segment_cache=self._segment_cache)
return RedisSplit(
split['name'], split['seed'], split['killed'],
split['defaultTreatment'], split['trafficTypeName'],
split['status'], split['changeNumber'],
segment_cache=self._segment_cache,
algo=split.get('algo')
)

def _parse_matcher_in_segment(self, partial_split, matcher, block_until_ready=False, *args,
**kwargs):
Expand All @@ -660,7 +665,7 @@ def _parse_matcher_in_segment(self, partial_split, matcher, block_until_ready=Fa


class RedisSplit(Split):
def __init__(self, name, seed, killed, default_treatment, traffic_type_name, status, change_number, conditions=None, segment_cache=None):
def __init__(self, name, seed, killed, default_treatment, traffic_type_name, status, change_number, conditions=None, segment_cache=None, algo=HashAlgorithm.LEGACY):
"""A split implementation that mantains a reference to the segment cache so segments can
be easily pickled and unpickled.
:param name: Name of the feature
Expand Down
Loading