Skip to content

Commit a995eb5

Browse files
committed
changed interface for getting default tokenizer
1 parent 77fbc99 commit a995eb5

File tree

7 files changed

+63
-36
lines changed

7 files changed

+63
-36
lines changed

src/json_stream/httpx/__init__.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json_stream
2-
from json_stream.select_tokenizer import default_tokenizer
32

43

54
CONTENT_CHUNK_SIZE = 10 * 1024
@@ -9,9 +8,21 @@ def _to_iterable(response, chunk_size):
98
return response.iter_bytes(chunk_size=chunk_size)
109

1110

12-
def load(response, persistent=False, tokenizer=default_tokenizer, chunk_size=CONTENT_CHUNK_SIZE):
13-
return json_stream.load(_to_iterable(response, chunk_size), persistent=persistent, tokenizer=tokenizer)
14-
15-
16-
def visit(response, visitor, tokenizer=default_tokenizer, chunk_size=CONTENT_CHUNK_SIZE):
17-
return json_stream.visit(_to_iterable(response, chunk_size), visitor, tokenizer=tokenizer)
11+
def load(response, persistent=False, tokenizer=None, chunk_size=CONTENT_CHUNK_SIZE, buffering=0, **kwargs):
12+
return json_stream.load(
13+
_to_iterable(response, chunk_size),
14+
persistent=persistent,
15+
tokenizer=tokenizer,
16+
buffering=buffering,
17+
**kwargs
18+
)
19+
20+
21+
def visit(response, visitor, tokenizer=None, chunk_size=CONTENT_CHUNK_SIZE, buffering=0, **kwargs):
22+
return json_stream.visit(
23+
_to_iterable(response, chunk_size),
24+
visitor,
25+
tokenizer=tokenizer,
26+
buffering=buffering,
27+
**kwargs
28+
)

src/json_stream/loader.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
from json_stream.base import StreamingJSONBase, TokenType
2-
from json_stream.iterators import ensure_file
3-
from json_stream.select_tokenizer import default_tokenizer
2+
from json_stream.select_tokenizer import get_token_stream
43

54

6-
def load(fp_or_iterable, persistent=False, tokenizer=default_tokenizer, buffering=-1):
7-
fp = ensure_file(fp_or_iterable)
8-
token_stream = tokenizer(fp, buffering=buffering)
5+
def load(fp_or_iterable, persistent=False, tokenizer=None, buffering=-1, **kwargs):
6+
token_stream = get_token_stream(fp_or_iterable, tokenizer=tokenizer, buffering=buffering, **kwargs)
97
token_type, token = next(token_stream)
108
if token_type == TokenType.OPERATOR:
119
return StreamingJSONBase.factory(token, token_stream, persistent)
Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json_stream
2-
from json_stream.select_tokenizer import default_tokenizer
32

43

54
CONTENT_CHUNK_SIZE = 10 * 1024
@@ -9,9 +8,23 @@ def _to_iterable(response, chunk_size):
98
return response.iter_content(chunk_size=chunk_size)
109

1110

12-
def load(response, persistent=False, tokenizer=default_tokenizer, chunk_size=CONTENT_CHUNK_SIZE):
13-
return json_stream.load(_to_iterable(response, chunk_size), persistent=persistent, tokenizer=tokenizer)
14-
15-
16-
def visit(response, visitor, tokenizer=default_tokenizer, chunk_size=CONTENT_CHUNK_SIZE):
17-
return json_stream.visit(_to_iterable(response, chunk_size), visitor, tokenizer=tokenizer)
11+
def load(response, persistent=False, tokenizer=None, chunk_size=CONTENT_CHUNK_SIZE,
12+
buffering=0, **kwargs):
13+
return json_stream.load(
14+
_to_iterable(response, chunk_size),
15+
persistent=persistent,
16+
tokenizer=tokenizer,
17+
buffering=buffering,
18+
**kwargs
19+
)
20+
21+
22+
def visit(response, visitor, tokenizer=None, chunk_size=CONTENT_CHUNK_SIZE,
23+
buffering=0, **kwargs):
24+
return json_stream.visit(
25+
_to_iterable(response, chunk_size),
26+
visitor,
27+
tokenizer=tokenizer,
28+
buffering=buffering,
29+
**kwargs
30+
)
Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
from warnings import warn
22

3+
from json_stream.iterators import ensure_file
34
from json_stream.tokenizer import tokenize
45
from json_stream_rs_tokenizer import rust_tokenizer_or_raise, ExtensionException
56

6-
try:
7-
default_tokenizer = rust_tokenizer_or_raise()
8-
except ExtensionException as e:
9-
warn(str(e), category=ImportWarning) # ImportWarnings are ignored by default
10-
default_tokenizer = tokenize
117

12-
__all__ = ['default_tokenizer']
8+
def get_tokenizer(**kwargs):
9+
try:
10+
return rust_tokenizer_or_raise(**kwargs)
11+
except ExtensionException as e:
12+
warn(str(e), category=ImportWarning) # ImportWarnings are ignored by default
13+
return tokenize
14+
15+
16+
def get_token_stream(fp_or_iterable, tokenizer, **tokenizer_kwargs):
17+
fp = ensure_file(fp_or_iterable)
18+
if tokenizer is None:
19+
tokenizer = get_tokenizer(**tokenizer_kwargs)
20+
return tokenizer(fp, **tokenizer_kwargs)

src/json_stream/tests/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
from itertools import zip_longest
44
from unittest import TestCase
55

6-
from json_stream.select_tokenizer import default_tokenizer
76

87
from json_stream import load
98
from json_stream.base import TransientAccessException
109

1110

1211
class JSONLoadTestCase(TestCase):
13-
def _test_object(self, obj, persistent, binary=False, tokenizer=default_tokenizer):
12+
def _test_object(self, obj, persistent, binary=False, tokenizer=None):
1413
self.assertListEqual(list(self._to_data(obj, persistent, binary, tokenizer)), list(obj))
1514
self.assertListEqual(list(self._to_data(obj, persistent, binary, tokenizer).keys()), list(obj.keys()))
1615
self.assertListEqual(list(self._to_data(obj, persistent, binary, tokenizer).values()), list(obj.values()))
@@ -40,7 +39,7 @@ def _test_object(self, obj, persistent, binary=False, tokenizer=default_tokenize
4039
with self.assertRaises(TransientAccessException):
4140
data.items() # can't get keys
4241

43-
def _test_list(self, obj, persistent, binary=False, tokenizer=default_tokenizer):
42+
def _test_list(self, obj, persistent, binary=False, tokenizer=None):
4443
self.assertListEqual(list(self._to_data(obj, persistent, binary, tokenizer)), list(obj))
4544
if persistent:
4645
self.assertEqual(len(self._to_data(obj, persistent, binary, tokenizer)), len(obj))

src/json_stream/tests/test_tokenizer_integration.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55

66
from json_stream import load
77

8-
from json_stream.select_tokenizer import default_tokenizer
8+
from json_stream.select_tokenizer import get_tokenizer
99

1010
from json_stream.tests import JSONLoadTestCase
1111

1212

1313
@skipUnless(hasattr(json_stream_rs_tokenizer, 'RustTokenizer'), 'rust tokenizer not available')
1414
class TestRSTokenizer(JSONLoadTestCase):
1515
def test_load_object(self):
16-
self.assertIs(default_tokenizer, json_stream_rs_tokenizer.RustTokenizer)
16+
self.assertIs(get_tokenizer(), json_stream_rs_tokenizer.RustTokenizer)
1717
obj = {"a": 1, "b": None, "c": True}
1818
self._test_object(obj, persistent=False)
1919

2020
def test_load_object_binary(self):
21-
self.assertIs(default_tokenizer, json_stream_rs_tokenizer.RustTokenizer)
21+
self.assertIs(get_tokenizer(), json_stream_rs_tokenizer.RustTokenizer)
2222
obj = {"a": 1, "b": None, "c": True}
2323
self._test_object(obj, persistent=False, binary=True)
2424

src/json_stream/visitor.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from json_stream.base import StreamingJSONObject, StreamingJSONList, StreamingJSONBase
2-
from json_stream.iterators import ensure_file
3-
from json_stream.select_tokenizer import default_tokenizer
2+
from json_stream.select_tokenizer import get_token_stream
43

54

65
def _visit(obj, visitor, path):
@@ -19,9 +18,8 @@ def _visit(obj, visitor, path):
1918
visitor(obj, path)
2019

2120

22-
def visit(fp_or_iterator, visitor, tokenizer=default_tokenizer, buffering=-1):
23-
fp = ensure_file(fp_or_iterator)
24-
token_stream = tokenizer(fp, buffering=buffering)
21+
def visit(fp_or_iterable, visitor, tokenizer=None, buffering=-1, **kwargs):
22+
token_stream = get_token_stream(fp_or_iterable, tokenizer=tokenizer, buffering=buffering, **kwargs)
2523
_, token = next(token_stream)
2624
obj = StreamingJSONBase.factory(token, token_stream, persistent=False)
2725
_visit(obj, visitor, ())

0 commit comments

Comments
 (0)