Skip to content

Use native bytearray truncation #120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion h11/_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,6 @@ def next_event(self):
event = self._extract_next_receive_event()
if event not in [NEED_DATA, PAUSED]:
self._process_event(self.their_role, event)
self._receive_buffer.compress()
if event is NEED_DATA:
if len(self._receive_buffer) > self._max_incomplete_event_size:
# 431 is "Request header fields too large" which is pretty
Expand Down
55 changes: 14 additions & 41 deletions h11/_receivebuffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,10 @@
# of constantly copying
# WARNING:
# - I haven't benchmarked or profiled any of this yet.
#
# Note that starting in Python 3.4, deleting the initial n bytes from a
# bytearray is amortized O(n), thanks to some excellent work by Antoine
# Martin:
#
# https://bugs.python.org/issue19087
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it'd be helpful to keep the paragraph above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point, yes. I've addressed it as an inline comment.

#
# This means that if we only supported 3.4+, we could get rid of the code here
# involving self._start and self.compress, because it's doing exactly the same
# thing that bytearray now does internally.
#
# BUT unfortunately, we still support 2.7, and reading short segments out of a
# long buffer MUST be O(bytes read) to avoid DoS issues, so we can't actually
# delete this code. Yet:
#
# https://pythonclock.org/
#
# (Two things to double-check first though: make sure PyPy also has the
# optimization, and benchmark to make sure it's a win, since we do have a
# slightly clever thing where we delay calling compress() until we've
# processed a whole event, which could in theory be slightly more efficient
# than the internal bytearray support.)
class ReceiveBuffer(object):
def __init__(self):
self._data = bytearray()
# These are both absolute offsets into self._data:
self._start = 0
self._looked_at = 0
self._looked_for = b""

Expand All @@ -51,56 +28,52 @@ def __bool__(self):

# for @property unprocessed_data
def __bytes__(self):
return bytes(self._data[self._start :])
return bytes(self._data)

if sys.version_info[0] < 3: # version specific: Python 2
__str__ = __bytes__
__nonzero__ = __bool__

def __len__(self):
return len(self._data) - self._start

def compress(self):
# Heuristic: only compress if it lets us reduce size by a factor
# of 2
if self._start > len(self._data) // 2:
del self._data[: self._start]
self._looked_at -= self._start
self._start -= self._start
return len(self._data)

def __iadd__(self, byteslike):
self._data += byteslike
return self

def maybe_extract_at_most(self, count):
out = self._data[self._start : self._start + count]
out = self._data[:count]
if not out:
return None
self._start += len(out)
# Note that front-truncation of bytesarray is amortized O(1), from
# Python 3.4 onwards, thanks to some excellent work by Antoine Pitrou:
#
# https://bugs.python.org/issue19087
del self._data[:count]
return out

def maybe_extract_until_next(self, needle):
# Returns extracted bytes on success (advancing offset), or None on
# failure
if self._looked_for == needle:
search_start = max(self._start, self._looked_at - len(needle) + 1)
search_start = max(0, self._looked_at - len(needle) + 1)
else:
search_start = self._start
search_start = 0
offset = self._data.find(needle, search_start)
if offset == -1:
self._looked_at = len(self._data)
self._looked_for = needle
return None
new_start = offset + len(needle)
out = self._data[self._start : new_start]
self._start = new_start
out = self._data[:new_start]
del self._data[:new_start]
return out

# HTTP/1.1 has a number of constructs where you keep reading lines until
# you see a blank one. This does that, and then returns the lines.
def maybe_extract_lines(self):
if self._data[self._start : self._start + 2] == b"\r\n":
self._start += 2
if self._data[:2] == b"\r\n":
del self._data[:2]
return []
else:
data = self.maybe_extract_until_next(b"\r\n\r\n")
Expand Down
2 changes: 0 additions & 2 deletions h11/tests/test_receivebuffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,13 @@ def test_receivebuffer():
assert len(b) == 3
assert bytes(b) == b"123"

b.compress()
assert bytes(b) == b"123"

assert b.maybe_extract_at_most(2) == b"12"
assert b
assert len(b) == 1
assert bytes(b) == b"3"

b.compress()
assert bytes(b) == b"3"

assert b.maybe_extract_at_most(10) == b"3"
Expand Down