python-hyper · tomchristie · Nov 24, 2020 · Nov 24, 2020 · Nov 24, 2020 · Nov 24, 2020
diff --git a/h11/_connection.py b/h11/_connection.py
@@ -425,7 +425,6 @@ def next_event(self):
             event = self._extract_next_receive_event()
             if event not in [NEED_DATA, PAUSED]:
                 self._process_event(self.their_role, event)
-                self._receive_buffer.compress()
             if event is NEED_DATA:
                 if len(self._receive_buffer) > self._max_incomplete_event_size:
                     # 431 is "Request header fields too large" which is pretty

diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py
@@ -16,33 +16,10 @@
 #   of constantly copying
 # WARNING:
 # - I haven't benchmarked or profiled any of this yet.
-#
-# Note that starting in Python 3.4, deleting the initial n bytes from a
-# bytearray is amortized O(n), thanks to some excellent work by Antoine
-# Martin:
-#
-#     https://bugs.python.org/issue19087
-#
-# This means that if we only supported 3.4+, we could get rid of the code here
-# involving self._start and self.compress, because it's doing exactly the same
-# thing that bytearray now does internally.
-#
-# BUT unfortunately, we still support 2.7, and reading short segments out of a
-# long buffer MUST be O(bytes read) to avoid DoS issues, so we can't actually
-# delete this code. Yet:
-#
-#     https://pythonclock.org/
-#
-# (Two things to double-check first though: make sure PyPy also has the
-# optimization, and benchmark to make sure it's a win, since we do have a
-# slightly clever thing where we delay calling compress() until we've
-# processed a whole event, which could in theory be slightly more efficient
-# than the internal bytearray support.)
 class ReceiveBuffer(object):
     def __init__(self):
         self._data = bytearray()
         # These are both absolute offsets into self._data:
-        self._start = 0
         self._looked_at = 0
         self._looked_for = b""
 
@@ -51,56 +28,52 @@ def __bool__(self):
 
     # for @property unprocessed_data
     def __bytes__(self):
-        return bytes(self._data[self._start :])
+        return bytes(self._data)
 
     if sys.version_info[0] < 3:  # version specific: Python 2
         __str__ = __bytes__
         __nonzero__ = __bool__
 
     def __len__(self):
-        return len(self._data) - self._start
-
-    def compress(self):
-        # Heuristic: only compress if it lets us reduce size by a factor
-        # of 2
-        if self._start > len(self._data) // 2:
-            del self._data[: self._start]
-            self._looked_at -= self._start
-            self._start -= self._start
+        return len(self._data)
 
     def __iadd__(self, byteslike):
         self._data += byteslike
         return self
 
     def maybe_extract_at_most(self, count):
-        out = self._data[self._start : self._start + count]
+        out = self._data[:count]
         if not out:
             return None
-        self._start += len(out)
+        # Note that front-truncation of bytesarray is amortized O(1), from
+        # Python 3.4 onwards, thanks to some excellent work by Antoine Pitrou:
+        #
+        # https://bugs.python.org/issue19087
+        del self._data[:count]
         return out
 
     def maybe_extract_until_next(self, needle):
         # Returns extracted bytes on success (advancing offset), or None on
         # failure
         if self._looked_for == needle:
-            search_start = max(self._start, self._looked_at - len(needle) + 1)
+            search_start = max(0, self._looked_at - len(needle) + 1)
         else:
-            search_start = self._start
+            search_start = 0
         offset = self._data.find(needle, search_start)
         if offset == -1:
             self._looked_at = len(self._data)
             self._looked_for = needle
             return None
         new_start = offset + len(needle)
-        out = self._data[self._start : new_start]
-        self._start = new_start
+        out = self._data[:new_start]
+        del self._data[:new_start]
         return out
 
     # HTTP/1.1 has a number of constructs where you keep reading lines until
     # you see a blank one. This does that, and then returns the lines.
     def maybe_extract_lines(self):
-        if self._data[self._start : self._start + 2] == b"\r\n":
-            self._start += 2
+        if self._data[:2] == b"\r\n":
+            del self._data[:2]
             return []
         else:
             data = self.maybe_extract_until_next(b"\r\n\r\n")

diff --git a/h11/tests/test_receivebuffer.py b/h11/tests/test_receivebuffer.py
@@ -12,15 +12,13 @@ def test_receivebuffer():
     assert len(b) == 3
     assert bytes(b) == b"123"
 
-    b.compress()
     assert bytes(b) == b"123"
 
     assert b.maybe_extract_at_most(2) == b"12"
     assert b
     assert len(b) == 1
     assert bytes(b) == b"3"
 
-    b.compress()
     assert bytes(b) == b"3"
 
     assert b.maybe_extract_at_most(10) == b"3"