Skip to content

Commit 7bb00ec

Browse files
committed
added readline support to json string streams
1 parent cb6ecaf commit 7bb00ec

File tree

2 files changed

+206
-3
lines changed

2 files changed

+206
-3
lines changed

src/json_stream/tokenizer/strings.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,19 @@ class JsonStringReader(io.TextIOBase):
2828
def __init__(self, stream: io.TextIOBase, initial_buffer=''):
2929
self.stream = stream
3030
self.buffer = initial_buffer
31+
self.readline_buffer = ''
3132
self.unicode_buffer = ''
3233
self.state = CHAR
33-
self.complete = False
34+
self.end_of_string = False
3435
self.index = 0
3536

37+
@property
38+
def complete(self):
39+
return self.end_of_string and not self.readline_buffer
40+
41+
def readable(self) -> bool:
42+
return True
43+
3644
def read(self, size: Union[int, None] = None) -> str:
3745
result = ''
3846
length = DEFAULT_BUFFER_SIZE
@@ -43,6 +51,9 @@ def read(self, size: Union[int, None] = None) -> str:
4351
return result
4452

4553
def _read_chunk(self, size: Union[int, None] = ...) -> str:
54+
if self.readline_buffer:
55+
result, self.readline_buffer = self.readline_buffer[:size], self.readline_buffer[size:]
56+
return result
4657
chunk = self.buffer or self.stream.read(size)
4758
if not chunk:
4859
raise ValueError("Unterminated string at end of file")
@@ -60,7 +71,7 @@ def _read_chunk(self, size: Union[int, None] = ...) -> str:
6071
if state == CHAR:
6172
if c == '"':
6273
result += chunk[start:i]
63-
self.complete = True
74+
self.end_of_string = True
6475
self.buffer = chunk[i + 1:]
6576
break
6677
elif c == "\\":
@@ -136,3 +147,23 @@ def _read_chunk(self, size: Union[int, None] = ...) -> str:
136147
self.state = state
137148
self.unicode_buffer = unicode_buffer
138149
return result
150+
151+
def readline(self, size: int = None) -> str:
152+
result = ''
153+
read_size = DEFAULT_BUFFER_SIZE
154+
while not self.complete:
155+
if size:
156+
result_length = len(result)
157+
if result_length >= size:
158+
result, self.readline_buffer = result[:size], result[size:] + self.readline_buffer
159+
break
160+
read_size = size - result_length
161+
chunk = self._read_chunk(read_size)
162+
i = chunk.find('\n')
163+
if i < 0:
164+
result += chunk
165+
else:
166+
chunk, self.readline_buffer = chunk[:i+1], chunk[i+1:]
167+
result += chunk
168+
break
169+
return result

src/json_stream/tokenizer/tests/test_strings.py

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
from io import StringIO
33
from unittest import TestCase
4+
from unittest.mock import patch
45

56
from json_stream.tokenizer.strings import JsonStringReader
67

@@ -76,7 +77,7 @@ def test_unterminated_strings_while_in_escape(self):
7677
self.assertStringRaises(r'"\u!!!', "Unterminated string at end of file")
7778

7879
def test_with_initial_buffer(self):
79-
self.assertStringEquals("there will be more string", buffer='"there will be ', stream='more string"')
80+
self.assertStringEquals("there will be more string", buffer='"there will be ', stream='more string"') # x x x
8081

8182
def test_remainder(self):
8283
reader, f = self.assertStringEquals(
@@ -132,6 +133,166 @@ def test_read_over_split_escape(self):
132133
buffer, stream = json[:i], json[i:]
133134
self.assertStringEquals("abcdeÄedcba", buffer=buffer, stream=stream)
134135

136+
def test_readable(self):
137+
reader = JsonStringReader(StringIO())
138+
self.assertTrue(reader.readable())
139+
140+
def test_readline(self):
141+
stream = StringIO(r'some\nlines\nof\ntext"')
142+
reader = JsonStringReader(stream)
143+
self.assertReadline(
144+
reader, stream,
145+
result='some\n',
146+
remaining_readline_buffer='lines\nof\ntext',
147+
complete=False,
148+
)
149+
self.assertReadline(
150+
reader, stream,
151+
result='lines\n',
152+
remaining_readline_buffer='of\ntext',
153+
complete=False,
154+
)
155+
self.assertReadline(
156+
reader, stream,
157+
result='of\n',
158+
remaining_readline_buffer='text',
159+
complete=False,
160+
)
161+
self.assertReadline(
162+
reader, stream,
163+
result='text',
164+
)
165+
166+
@patch('json_stream.tokenizer.strings.DEFAULT_BUFFER_SIZE', 10)
167+
def test_readline_needs_multiple_reads(self):
168+
stream = StringIO(r'aaaaaaaaaabbbbb\ncccdddddddd"')
169+
reader = JsonStringReader(stream)
170+
self.assertReadline(
171+
reader, stream,
172+
result='aaaaaaaaaabbbbb\n',
173+
remaining_readline_buffer='ccc',
174+
remaining_stream='dddddddd"',
175+
complete=False,
176+
)
177+
self.assertReadline(reader, stream, 'cccdddddddd')
178+
179+
def test_readline_eof_without_newline(self):
180+
stream = StringIO(r'aaaaaaaaaabbbbbcccdddddddd"')
181+
reader = JsonStringReader(stream)
182+
self.assertReadline(
183+
reader, stream,
184+
result='aaaaaaaaaabbbbbcccdddddddd',
185+
)
186+
self.assertReadline(reader, stream, '')
187+
188+
@patch('json_stream.tokenizer.strings.DEFAULT_BUFFER_SIZE', 10)
189+
def test_readline_then_read(self):
190+
stream = StringIO(r'aaaaaaaaaabbbbbbbb\ndddddddd"')
191+
reader = JsonStringReader(stream)
192+
self.assertReadline(
193+
reader, stream,
194+
result='aaaaaaaaaabbbbbbbb\n',
195+
remaining_stream='dddddddd"',
196+
complete=False,
197+
)
198+
self.assertRead(reader, stream, result='dddddddd')
199+
200+
@patch('json_stream.tokenizer.strings.DEFAULT_BUFFER_SIZE', 10)
201+
def test_readline_then_read_with_data_in_buffer(self):
202+
stream = StringIO(r'aaaaaaaaaabbbbb\ncccdddddddd"')
203+
reader = JsonStringReader(stream)
204+
self.assertReadline(
205+
reader, stream,
206+
result='aaaaaaaaaabbbbb\n',
207+
remaining_readline_buffer='ccc',
208+
remaining_stream='dddddddd"',
209+
complete=False,
210+
)
211+
self.assertRead(reader, stream, result='cccdddddddd')
212+
213+
def test_read_then_readline(self):
214+
stream = StringIO(r'aaaaaaaaaabbbbb\ncccdddddddd"')
215+
reader = JsonStringReader(stream)
216+
self.assertRead(
217+
reader, stream,
218+
result='aaaaaaaaaa',
219+
remaining_stream=r'bbbbb\ncccdddddddd"',
220+
amount=10,
221+
complete=False,
222+
)
223+
self.assertReadline(
224+
reader, stream,
225+
result='bbbbb\n',
226+
remaining_readline_buffer='cccdddddddd',
227+
complete=False,
228+
)
229+
self.assertReadline(
230+
reader, stream,
231+
result='cccdddddddd',
232+
)
233+
234+
def test_readline_with_size_shorter_than_line(self):
235+
stream = StringIO(r'aaaaaaaaaabbbbb\ncccdddddddd"')
236+
reader = JsonStringReader(stream)
237+
self.assertReadline(
238+
reader, stream,
239+
result='aaaaaaaaaa',
240+
remaining_stream=r'bbbbb\ncccdddddddd"',
241+
amount=10,
242+
complete=False,
243+
)
244+
self.assertReadline(
245+
reader, stream,
246+
result='bbbbb\n',
247+
remaining_readline_buffer='cccdddddddd',
248+
complete=False,
249+
)
250+
self.assertReadline(
251+
reader, stream,
252+
result='cccdddddddd',
253+
)
254+
255+
def test_readline_with_size_longer_than_line(self):
256+
stream = StringIO(r'aaaaaaaaaabbbbb\ncccdddddddd"')
257+
reader = JsonStringReader(stream)
258+
self.assertReadline(
259+
reader, stream,
260+
result='aaaaaaaaaabbbbb\n',
261+
remaining_readline_buffer='ccc',
262+
remaining_stream='dddddddd"',
263+
amount=20,
264+
complete=False,
265+
)
266+
self.assertReadline(reader, stream, 'cccdddddddd')
267+
268+
def test_readline_trailing_newline(self):
269+
stream = StringIO(r'a\n"')
270+
reader = JsonStringReader(stream)
271+
self.assertReadline(
272+
reader, stream,
273+
result='a\n',
274+
)
275+
276+
def test_readline_no_trailing_newline(self):
277+
stream = StringIO(r'a\nb"')
278+
reader = JsonStringReader(stream)
279+
self.assertReadline(
280+
reader, stream,
281+
result='a\n',
282+
remaining_readline_buffer='b',
283+
complete=False
284+
)
285+
self.assertReadline(reader, stream, 'b')
286+
287+
def test_readlines(self):
288+
stream = StringIO(r'some\nlines\nof\ntext"')
289+
reader = JsonStringReader(stream)
290+
self.assertListEqual(["some\n", "lines\n", "of\n", "text"], reader.readlines())
291+
self.assertEqual('', reader.readline_buffer)
292+
self.assertEqual('', reader.buffer)
293+
self.assertEqual('', stream.read())
294+
self.assertTrue(reader.complete)
295+
135296
def assertStringEquals(self, result, stream, buffer='', remaining_buffer='', remaining_stream='', amount=None,
136297
complete=True):
137298
if buffer:
@@ -145,6 +306,17 @@ def assertStringEquals(self, result, stream, buffer='', remaining_buffer='', rem
145306

146307
def assertRead(self, reader, stream, result, remaining_buffer='', remaining_stream='', amount=None, complete=True):
147308
self.assertEqual(result, reader.read(amount))
309+
self.assertEqual(reader.readline_buffer, '')
310+
self.assertEqual(remaining_buffer, reader.buffer)
311+
pos = stream.tell()
312+
self.assertEqual(remaining_stream, stream.read())
313+
stream.seek(pos)
314+
self.assertEqual(complete, reader.complete)
315+
316+
def assertReadline(self, reader, stream, result, remaining_readline_buffer='', remaining_buffer='',
317+
remaining_stream='', amount=None, complete=True):
318+
self.assertEqual(result, reader.readline(amount))
319+
self.assertEqual(remaining_readline_buffer, reader.readline_buffer)
148320
self.assertEqual(remaining_buffer, reader.buffer)
149321
pos = stream.tell()
150322
self.assertEqual(remaining_stream, stream.read())

0 commit comments

Comments
 (0)