Skip to content

Commit 0eb7991

Browse files
committed
Some refactoring to tidy up hash methods
1 parent e87fbb8 commit 0eb7991

File tree

8 files changed

+177
-63
lines changed

8 files changed

+177
-63
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# The default ``config.py``
2+
# flake8: noqa
3+
4+
5+
def set_prefs(prefs):
6+
"""This function is called before opening the project"""
7+
8+
# Specify which files and folders to ignore in the project.
9+
# Changes to ignored resources are not added to the history and
10+
# VCSs. Also they are not returned in `Project.get_files()`.
11+
# Note that ``?`` and ``*`` match all characters but slashes.
12+
# '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
13+
# 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
14+
# '.svn': matches 'pkg/.svn' and all of its children
15+
# 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
16+
# 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
17+
prefs['ignored_resources'] = [
18+
'*.pyc', '*~', '.ropeproject', '.hg', '.svn', '_svn',
19+
'.git', '.tox', '.env', 'env', 'venv', 'node_modules',
20+
'bower_components'
21+
]
22+
23+
# Specifies which files should be considered python files. It is
24+
# useful when you have scripts inside your project. Only files
25+
# ending with ``.py`` are considered to be python files by
26+
# default.
27+
#prefs['python_files'] = ['*.py']
28+
29+
# Custom source folders: By default rope searches the project
30+
# for finding source folders (folders that should be searched
31+
# for finding modules). You can add paths to that list. Note
32+
# that rope guesses project source folders correctly most of the
33+
# time; use this if you have any problems.
34+
# The folders should be relative to project root and use '/' for
35+
# separating folders regardless of the platform rope is running on.
36+
# 'src/my_source_folder' for instance.
37+
#prefs.add('source_folders', 'src')
38+
39+
# You can extend python path for looking up modules
40+
#prefs.add('python_path', '~/python/')
41+
42+
# Should rope save object information or not.
43+
prefs['save_objectdb'] = True
44+
prefs['compress_objectdb'] = False
45+
46+
# If `True`, rope analyzes each module when it is being saved.
47+
prefs['automatic_soa'] = True
48+
# The depth of calls to follow in static object analysis
49+
prefs['soa_followed_calls'] = 0
50+
51+
# If `False` when running modules or unit tests "dynamic object
52+
# analysis" is turned off. This makes them much faster.
53+
prefs['perform_doa'] = True
54+
55+
# Rope can check the validity of its object DB when running.
56+
prefs['validate_objectdb'] = True
57+
58+
# How many undos to hold?
59+
prefs['max_history_items'] = 32
60+
61+
# Shows whether to save history across sessions.
62+
prefs['save_history'] = True
63+
prefs['compress_history'] = False
64+
65+
# Set the number spaces used for indenting. According to
66+
# :PEP:`8`, it is best to use 4 spaces. Since most of rope's
67+
# unit-tests use 4 spaces it is more reliable, too.
68+
prefs['indent_size'] = 4
69+
70+
# Builtin and c-extension modules that are allowed to be imported
71+
# and inspected by rope.
72+
prefs['extension_modules'] = []
73+
74+
# Add all standard c-extensions to extension_modules list.
75+
prefs['import_dynload_stdmods'] = True
76+
77+
# If `True` modules with syntax errors are considered to be empty.
78+
# The default value is `False`; When `False` syntax errors raise
79+
# `rope.base.exceptions.ModuleSyntaxError` exception.
80+
prefs['ignore_syntax_errors'] = False
81+
82+
# If `True`, rope ignores unresolvable imports. Otherwise, they
83+
# appear in the importing namespace.
84+
prefs['ignore_bad_imports'] = False
85+
86+
# If `True`, rope will insert new module imports as
87+
# `from <package> import <module>` by default.
88+
prefs['prefer_module_from_imports'] = False
89+
90+
# If `True`, rope will transform a comma list of imports into
91+
# multiple separate import statements when organizing
92+
# imports.
93+
prefs['split_imports'] = False
94+
95+
# If `True`, rope will sort imports alphabetically by module name
96+
# instead of alphabetically by import statement, with from imports
97+
# after normal imports.
98+
prefs['sort_imports_alphabetically'] = False
99+
100+
101+
def project_opened(project):
102+
"""This function is called after opening the project"""
103+
# Do whatever you like here!
130 Bytes
Binary file not shown.

splitio/hashfns/.ropeproject/history

14 Bytes
Binary file not shown.

splitio/hashfns/.ropeproject/objectdb

6 Bytes
Binary file not shown.

splitio/hashfns/__init__.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
This module contains hash functions implemented in pure python
3+
as well as the optional import (if installed) of a C compiled murmur hash
4+
function with python bindings.
5+
"""
6+
from __future__ import absolute_import, division, print_function, \
7+
unicode_literals
8+
9+
from splitio.hashfns import legacy
10+
11+
try:
12+
# First attempt to import module with C++ core (faster)
13+
import mmh3
14+
from ctypes import c_uint
15+
16+
def _murmur_hash(key, seed):
17+
ukey = key.encode('utf8')
18+
return c_uint(mmh3.hash(ukey, seed)).value
19+
except:
20+
# Fallback to interpreted python hash algoritm (slower)
21+
from splitio.hashfns import murmur3py
22+
_murmur_hash = murmur3py.murmur32_py
23+
24+
25+
_HASH_ALGORITHMS = {
26+
'legacy': legacy.legacy_hash,
27+
'murmur': _murmur_hash
28+
}
29+
30+
31+
def get_hash_fn(algo):
32+
"""
33+
Return appropriate hash function for requested algorithm
34+
:param algo: Algoritm to use
35+
:return: Hash function
36+
:rtype: function
37+
"""
38+
return _HASH_ALGORITHMS.get(algo, legacy.legacy_hash)

splitio/hashfns/legacy.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from __future__ import absolute_import, division, print_function, \
2+
unicode_literals
3+
4+
5+
def as_int32(value):
6+
if not -2147483649 <= value <= 2147483648:
7+
return (value + 2147483648) % 4294967296 - 2147483648
8+
return value
9+
10+
11+
def legacy_hash(key, seed):
12+
"""
13+
Generates a hash for a key and a feature seed.
14+
:param key: The key for which to get the hash
15+
:type key: str
16+
:param seed: The feature seed
17+
:type seed: int
18+
:return: The hash for the key and seed
19+
:rtype: int
20+
"""
21+
h = 0
22+
23+
for c in map(ord, key):
24+
h = as_int32(as_int32(31 * as_int32(h)) + c)
25+
26+
return int(as_int32(h ^ as_int32(seed)))

splitio/hashfns.py renamed to splitio/hashfns/murmur3py.py

Lines changed: 1 addition & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
"""
2-
This module contains hash functions implemented in pure python
3-
as well as the optional import (if installed) of a C compiled murmur hash
4-
function with python bindings.
5-
"""
61
from __future__ import absolute_import, division, print_function, \
72
unicode_literals
83

4+
95
import sys as _sys
106
if (_sys.version_info > (3, 0)):
117
def xrange(a, b, c):
@@ -72,56 +68,3 @@ def fmix(h):
7268

7369
unsigned_val = fmix(h1 ^ length)
7470
return unsigned_val
75-
76-
77-
try:
78-
# First attempt to import module with C++ core (faster)
79-
import mmh3
80-
from ctypes import c_uint
81-
82-
def _murmur_hash(key, seed):
83-
ukey = key.encode('utf8')
84-
return c_uint(mmh3.hash(ukey, seed)).value
85-
except:
86-
# Fallback to interpreted python hash algoritm (slower)
87-
_murmur_hash = murmur32_py
88-
89-
90-
def as_int32(value):
91-
if not -2147483649 <= value <= 2147483648:
92-
return (value + 2147483648) % 4294967296 - 2147483648
93-
return value
94-
95-
96-
def _basic_hash(key, seed):
97-
"""
98-
Generates a hash for a key and a feature seed.
99-
:param key: The key for which to get the hash
100-
:type key: str
101-
:param seed: The feature seed
102-
:type seed: int
103-
:return: The hash for the key and seed
104-
:rtype: int
105-
"""
106-
h = 0
107-
108-
for c in map(ord, key):
109-
h = as_int32(as_int32(31 * as_int32(h)) + c)
110-
111-
return int(as_int32(h ^ as_int32(seed)))
112-
113-
114-
_HASH_ALGORITHMS = {
115-
'legacy': _basic_hash,
116-
'murmur': _murmur_hash
117-
}
118-
119-
120-
def get_hash_fn(algo):
121-
"""
122-
Return appropriate hash function for requested algorithm
123-
:param algo: Algoritm to use
124-
:return: Hash function
125-
:rtype: function
126-
"""
127-
return _HASH_ALGORITHMS.get(algo, _basic_hash)

splitio/tests/test_splitters.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from splitio.splits import Partition, HashAlgorithm
1818
from splitio.splitters import Splitter
1919
from splitio.treatments import CONTROL
20-
from splitio.hashfns import _basic_hash, _murmur_hash
20+
from splitio.hashfns import _HASH_ALGORITHMS
2121
from splitio.tests.utils import MockUtilsMixin, random_alphanumeric_string
2222
import io
2323

@@ -118,37 +118,41 @@ def test_with_sample_data(self):
118118
"""
119119
Tests basic hash against expected values using alphanumeric values
120120
"""
121+
hashfn = _HASH_ALGORITHMS['legacy']
121122
with open(join(dirname(__file__), 'sample-data.jsonl')) as f:
122123
for line in map(loads, f):
123124
seed, key, hash_, bucket = line
124-
self.assertEqual(int(hash_), _basic_hash(key, int(seed)))
125+
self.assertEqual(int(hash_), hashfn(key, int(seed)))
125126
@skip
126127
def test_with_non_alpha_numeric_sample_data(self):
127128
"""
128129
Tests basic hash against expected values using non alphanumeric values
129130
"""
131+
hashfn = _HASH_ALGORITHMS['legacy']
130132
with io.open(join(dirname(__file__), 'sample-data-non-alpha-numeric.jsonl'), 'r', encoding='utf-8') as f:
131133
for line in map(loads, f):
132134
seed, key, hash_, bucket = line
133-
self.assertEqual(int(hash_), _basic_hash(key, int(seed)))
135+
self.assertEqual(int(hash_), hashfn(key, int(seed)))
134136

135137
def test_murmur_with_sample_data(self):
136138
"""
137139
Tests murmur32 hash against expected values using alphanumeric values
138140
"""
141+
hashfn = _HASH_ALGORITHMS['murmur']
139142
with open(join(dirname(__file__), 'murmur3-sample-data-v2.csv')) as f:
140143
for line in f:
141144
seed, key, hash_, bucket = line.split(',')
142-
self.assertEqual(int(hash_), _murmur_hash(key, int(seed)))
145+
self.assertEqual(int(hash_), hashfn(key, int(seed)))
143146

144147
def test_murmur_with_non_alpha_numeric_sample_data(self):
145148
"""
146149
Tests murmur32 hash against expected values using non alphanumeric values
147150
"""
151+
hashfn = _HASH_ALGORITHMS['murmur']
148152
with io.open(join(dirname(__file__), 'murmur3-sample-data-non-alpha-numeric-v2.csv'), 'r', encoding='utf-8') as f:
149153
for line in f:
150154
seed, key, hash_, bucket = line.split(',')
151-
self.assertEqual(int(hash_), _murmur_hash(key, int(seed)))
155+
self.assertEqual(int(hash_), hashfn(key, int(seed)))
152156

153157

154158
class SplitterGetBucketUnitTests(TestCase):

0 commit comments

Comments
 (0)