From d379c6b81fa32546c75a766e7a02b23851be29c6 Mon Sep 17 00:00:00 2001 From: Dmitry Erlikh Date: Wed, 27 Oct 2021 15:07:44 +0200 Subject: [PATCH] http-parser -> llhttp (#5364) --- .github/workflows/ci.yml | 40 +++++- .gitignore | 1 + .gitmodules | 8 +- CHANGES/3561.feature | 1 + Makefile | 14 +- aiohttp/_cparser.pxd | 293 ++++++++++++++++++++++---------------- aiohttp/_http_parser.pyx | 207 ++++++++------------------- setup.py | 9 +- tests/test_http_parser.py | 9 ++ vendor/http-parser | 1 - vendor/llhttp | 1 + 11 files changed, 302 insertions(+), 282 deletions(-) create mode 100644 CHANGES/3561.feature delete mode 160000 vendor/http-parser create mode 160000 vendor/llhttp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b92a258785..81f96ffbb93 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,9 +71,41 @@ jobs: run: | LC_ALL=C sort -c CONTRIBUTORS.txt + gen_llhttp: + name: Generate llhttp sources + needs: lint + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: true + - name: Cache llhttp generated files + uses: actions/cache@v2 + id: cache + with: + key: llhttp-${{ hashFiles('vendor/llhttp/package.json', 'vendor/llhttp/src/**/*') }} + path: vendor/llhttp/build + - name: Setup NodeJS + if: steps.cache.outputs.cache-hit != 'true' + uses: actions/setup-node@v2 + with: + node-version: '14' + - name: Generate llhttp sources + if: steps.cache.outputs.cache-hit != 'true' + run: | + make generate-llhttp + - name: Upload llhttp generated files + uses: actions/upload-artifact@v2 + with: + name: llhttp + path: vendor/llhttp/build + if-no-files-found: error + test: name: Test - needs: lint + needs: gen_llhttp strategy: matrix: pyver: [3.7, 3.8, 3.9, '3.10'] @@ -122,6 +154,12 @@ jobs: if: ${{ matrix.no-extensions == '' }} run: | make cythonize + - name: Restore llhttp generated files + if: ${{ matrix.no-extensions == '' }} + uses: actions/download-artifact@v2 + with: + name: llhttp + path: vendor/llhttp/build/ - name: Run unittests env: COLOR: 'yes' diff --git a/.gitignore b/.gitignore index 8556509c6f7..7d38dd91998 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ .idea .install-cython .install-deps +.llhttp-gen .installed.cfg .mypy_cache .noseids diff --git a/.gitmodules b/.gitmodules index c58945aa9fc..5f9f397b8e2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ -[submodule "vendor/http-parser"] - path = vendor/http-parser - url = git://github.com/nodejs/http-parser.git - branch = 54f55a2 +[submodule "vendor/llhttp"] + path = vendor/llhttp + url = https://github.com/nodejs/llhttp.git + branch = v3.0.0 diff --git a/CHANGES/3561.feature b/CHANGES/3561.feature new file mode 100644 index 00000000000..db42f95f08e --- /dev/null +++ b/CHANGES/3561.feature @@ -0,0 +1 @@ +Switch from http-parser to llhttp diff --git a/Makefile b/Makefile index 81b8a0325c3..c64173402b0 100644 --- a/Makefile +++ b/Makefile @@ -62,6 +62,15 @@ aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c cython -3 -o $@ $< -I aiohttp +vendor/llhttp/node_modules: vendor/llhttp/package.json + cd vendor/llhttp; npm install + +.llhttp-gen: vendor/llhttp/node_modules + $(MAKE) -C vendor/llhttp generate + @touch .llhttp-gen + +.PHONY: generate-llhttp +generate-llhttp: .llhttp-gen .PHONY: cythonize cythonize: .install-cython $(PYXS:.pyx=.c) @@ -81,7 +90,7 @@ fmt format: mypy: mypy -.develop: .install-deps $(call to-hash,$(PYS) $(CYS) $(CS)) +.develop: .install-deps generate-llhttp $(call to-hash,$(PYS) $(CYS) $(CS)) pip install -e . @touch .develop @@ -137,6 +146,9 @@ clean: @rm -rf aiohttp.egg-info @rm -f .install-deps @rm -f .install-cython + @rm -rf vendor/llhttp/node_modules + @rm -f .llhttp-gen + @$(MAKE) -C vendor/llhttp clean .PHONY: doc doc: diff --git a/aiohttp/_cparser.pxd b/aiohttp/_cparser.pxd index 0f9fc009236..41f2f56e493 100644 --- a/aiohttp/_cparser.pxd +++ b/aiohttp/_cparser.pxd @@ -1,140 +1,189 @@ -from libc.stdint cimport uint16_t, uint32_t, uint64_t - - -cdef extern from "../vendor/http-parser/http_parser.h": - ctypedef int (*http_data_cb) (http_parser*, - const char *at, - size_t length) except -1 - - ctypedef int (*http_cb) (http_parser*) except -1 - - struct http_parser: - unsigned int type - unsigned int flags - unsigned int state - unsigned int header_state - unsigned int index - - uint32_t nread +from libc.stdint cimport ( + int8_t, + int16_t, + int32_t, + int64_t, + uint8_t, + uint16_t, + uint32_t, + uint64_t, +) + +cdef extern from "../vendor/llhttp/build/llhttp.h": + + struct llhttp__internal_s: + int32_t _index + void* _span_pos0 + void* _span_cb0 + int32_t error + const char* reason + const char* error_pos + void* data + void* _current uint64_t content_length - - unsigned short http_major - unsigned short http_minor - unsigned int status_code - unsigned int method - unsigned int http_errno - - unsigned int upgrade - - void *data - - struct http_parser_settings: - http_cb on_message_begin - http_data_cb on_url - http_data_cb on_status - http_data_cb on_header_field - http_data_cb on_header_value - http_cb on_headers_complete - http_data_cb on_body - http_cb on_message_complete - http_cb on_chunk_header - http_cb on_chunk_complete - - enum http_parser_type: - HTTP_REQUEST, - HTTP_RESPONSE, - HTTP_BOTH - - enum http_errno: + uint8_t type + uint8_t method + uint8_t http_major + uint8_t http_minor + uint8_t header_state + uint8_t lenient_flags + uint8_t upgrade + uint8_t finish + uint16_t flags + uint16_t status_code + void* settings + + ctypedef llhttp__internal_s llhttp__internal_t + ctypedef llhttp__internal_t llhttp_t + + ctypedef int (*llhttp_data_cb)(llhttp_t*, const char *at, size_t length) except -1 + ctypedef int (*llhttp_cb)(llhttp_t*) except -1 + + struct llhttp_settings_s: + llhttp_cb on_message_begin + llhttp_data_cb on_url + llhttp_data_cb on_status + llhttp_data_cb on_header_field + llhttp_data_cb on_header_value + llhttp_cb on_headers_complete + llhttp_data_cb on_body + llhttp_cb on_message_complete + llhttp_cb on_chunk_header + llhttp_cb on_chunk_complete + + llhttp_cb on_url_complete + llhttp_cb on_status_complete + llhttp_cb on_header_field_complete + llhttp_cb on_header_value_complete + + ctypedef llhttp_settings_s llhttp_settings_t + + enum llhttp_errno: HPE_OK, - HPE_CB_message_begin, - HPE_CB_url, - HPE_CB_header_field, - HPE_CB_header_value, - HPE_CB_headers_complete, - HPE_CB_body, - HPE_CB_message_complete, - HPE_CB_status, - HPE_CB_chunk_header, - HPE_CB_chunk_complete, - HPE_INVALID_EOF_STATE, - HPE_HEADER_OVERFLOW, + HPE_INTERNAL, + HPE_STRICT, + HPE_LF_EXPECTED, + HPE_UNEXPECTED_CONTENT_LENGTH, HPE_CLOSED_CONNECTION, - HPE_INVALID_VERSION, - HPE_INVALID_STATUS, HPE_INVALID_METHOD, HPE_INVALID_URL, - HPE_INVALID_HOST, - HPE_INVALID_PORT, - HPE_INVALID_PATH, - HPE_INVALID_QUERY_STRING, - HPE_INVALID_FRAGMENT, - HPE_LF_EXPECTED, + HPE_INVALID_CONSTANT, + HPE_INVALID_VERSION, HPE_INVALID_HEADER_TOKEN, HPE_INVALID_CONTENT_LENGTH, HPE_INVALID_CHUNK_SIZE, - HPE_INVALID_CONSTANT, - HPE_INVALID_INTERNAL_STATE, - HPE_STRICT, + HPE_INVALID_STATUS, + HPE_INVALID_EOF_STATE, + HPE_INVALID_TRANSFER_ENCODING, + HPE_CB_MESSAGE_BEGIN, + HPE_CB_HEADERS_COMPLETE, + HPE_CB_MESSAGE_COMPLETE, + HPE_CB_CHUNK_HEADER, + HPE_CB_CHUNK_COMPLETE, HPE_PAUSED, - HPE_UNKNOWN + HPE_PAUSED_UPGRADE, + HPE_USER - enum flags: - F_CHUNKED, + ctypedef llhttp_errno llhttp_errno_t + + enum llhttp_flags: F_CONNECTION_KEEP_ALIVE, F_CONNECTION_CLOSE, F_CONNECTION_UPGRADE, - F_TRAILING, + F_CHUNKED, F_UPGRADE, + F_CONTENT_LENGTH, F_SKIPBODY, - F_CONTENTLENGTH - - enum http_method: - DELETE, GET, HEAD, POST, PUT, CONNECT, OPTIONS, TRACE, COPY, - LOCK, MKCOL, MOVE, PROPFIND, PROPPATCH, SEARCH, UNLOCK, BIND, - REBIND, UNBIND, ACL, REPORT, MKACTIVITY, CHECKOUT, MERGE, - MSEARCH, NOTIFY, SUBSCRIBE, UNSUBSCRIBE, PATCH, PURGE, MKCALENDAR, - LINK, UNLINK - - void http_parser_init(http_parser *parser, http_parser_type type) - - size_t http_parser_execute(http_parser *parser, - const http_parser_settings *settings, - const char *data, - size_t len) - - int http_should_keep_alive(const http_parser *parser) - - void http_parser_settings_init(http_parser_settings *settings) - - const char *http_errno_name(http_errno err) - const char *http_errno_description(http_errno err) - const char *http_method_str(http_method m) - - # URL Parser - - enum http_parser_url_fields: - UF_SCHEMA = 0, - UF_HOST = 1, - UF_PORT = 2, - UF_PATH = 3, - UF_QUERY = 4, - UF_FRAGMENT = 5, - UF_USERINFO = 6, - UF_MAX = 7 - - struct http_parser_url_field_data: - uint16_t off - uint16_t len + F_TRAILING, + F_TRANSFER_ENCODING - struct http_parser_url: - uint16_t field_set - uint16_t port - http_parser_url_field_data[UF_MAX] field_data + enum llhttp_lenient_flags: + LENIENT_HEADERS, + LENIENT_CHUNKED_LENGTH - void http_parser_url_init(http_parser_url *u) + enum llhttp_type: + HTTP_REQUEST, + HTTP_RESPONSE, + HTTP_BOTH - int http_parser_parse_url(const char *buf, - size_t buflen, - int is_connect, - http_parser_url *u) + enum llhttp_finish_t: + HTTP_FINISH_SAFE, + HTTP_FINISH_SAFE_WITH_CB, + HTTP_FINISH_UNSAFE + + enum llhttp_method: + HTTP_DELETE, + HTTP_GET, + HTTP_HEAD, + HTTP_POST, + HTTP_PUT, + HTTP_CONNECT, + HTTP_OPTIONS, + HTTP_TRACE, + HTTP_COPY, + HTTP_LOCK, + HTTP_MKCOL, + HTTP_MOVE, + HTTP_PROPFIND, + HTTP_PROPPATCH, + HTTP_SEARCH, + HTTP_UNLOCK, + HTTP_BIND, + HTTP_REBIND, + HTTP_UNBIND, + HTTP_ACL, + HTTP_REPORT, + HTTP_MKACTIVITY, + HTTP_CHECKOUT, + HTTP_MERGE, + HTTP_MSEARCH, + HTTP_NOTIFY, + HTTP_SUBSCRIBE, + HTTP_UNSUBSCRIBE, + HTTP_PATCH, + HTTP_PURGE, + HTTP_MKCALENDAR, + HTTP_LINK, + HTTP_UNLINK, + HTTP_SOURCE, + HTTP_PRI, + HTTP_DESCRIBE, + HTTP_ANNOUNCE, + HTTP_SETUP, + HTTP_PLAY, + HTTP_PAUSE, + HTTP_TEARDOWN, + HTTP_GET_PARAMETER, + HTTP_SET_PARAMETER, + HTTP_REDIRECT, + HTTP_RECORD, + HTTP_FLUSH + + ctypedef llhttp_method llhttp_method_t; + + void llhttp_settings_init(llhttp_settings_t* settings) + void llhttp_init(llhttp_t* parser, llhttp_type type, + const llhttp_settings_t* settings) + + llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len) + llhttp_errno_t llhttp_finish(llhttp_t* parser) + + int llhttp_message_needs_eof(const llhttp_t* parser) + + int llhttp_should_keep_alive(const llhttp_t* parser) + + void llhttp_pause(llhttp_t* parser) + void llhttp_resume(llhttp_t* parser) + + void llhttp_resume_after_upgrade(llhttp_t* parser) + + llhttp_errno_t llhttp_get_errno(const llhttp_t* parser) + const char* llhttp_get_error_reason(const llhttp_t* parser) + void llhttp_set_error_reason(llhttp_t* parser, const char* reason) + const char* llhttp_get_error_pos(const llhttp_t* parser) + const char* llhttp_errno_name(llhttp_errno_t err) + + const char* llhttp_method_name(llhttp_method_t method) + + void llhttp_set_lenient_headers(llhttp_t* parser, int enabled) + void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled) diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index 2a6d1ffa6f5..a63548854a0 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -80,13 +80,13 @@ cdef inline object extend(object buf, const char* at, size_t length): memcpy(ptr + s, at, length) -DEF METHODS_COUNT = 34; +DEF METHODS_COUNT = 46; cdef list _http_method = [] for i in range(METHODS_COUNT): _http_method.append( - cparser.http_method_str( i).decode('ascii')) + cparser.llhttp_method_name( i).decode('ascii')) cdef inline str http_method_str(int i): @@ -272,8 +272,8 @@ cdef _new_response_message(object version, cdef class HttpParser: cdef: - cparser.http_parser* _cparser - cparser.http_parser_settings* _csettings + cparser.llhttp_t* _cparser + cparser.llhttp_settings_t* _csettings bytearray _raw_name bytearray _raw_value @@ -310,13 +310,13 @@ cdef class HttpParser: Py_buffer py_buf def __cinit__(self): - self._cparser = \ - PyMem_Malloc(sizeof(cparser.http_parser)) + self._cparser = \ + PyMem_Malloc(sizeof(cparser.llhttp_t)) if self._cparser is NULL: raise MemoryError() - self._csettings = \ - PyMem_Malloc(sizeof(cparser.http_parser_settings)) + self._csettings = \ + PyMem_Malloc(sizeof(cparser.llhttp_settings_t)) if self._csettings is NULL: raise MemoryError() @@ -324,19 +324,18 @@ cdef class HttpParser: PyMem_Free(self._cparser) PyMem_Free(self._csettings) - cdef _init(self, cparser.http_parser_type mode, + cdef _init(self, cparser.llhttp_type mode, object protocol, object loop, int limit, object timer=None, size_t max_line_size=8190, size_t max_headers=32768, size_t max_field_size=8190, payload_exception=None, bint response_with_body=True, bint read_until_eof=False, bint auto_decompress=True): - cparser.http_parser_init(self._cparser, mode) + cparser.llhttp_settings_init(self._csettings) + cparser.llhttp_init(self._cparser, mode, self._csettings) self._cparser.data = self self._cparser.content_length = 0 - cparser.http_parser_settings_init(self._csettings) - self._protocol = protocol self._loop = loop self._timer = timer @@ -417,7 +416,7 @@ cdef class HttpParser: self._process_header() method = http_method_str(self._cparser.method) - should_close = not cparser.http_should_keep_alive(self._cparser) + should_close = not cparser.llhttp_should_keep_alive(self._cparser) upgrade = self._cparser.upgrade chunked = self._cparser.flags & cparser.F_CHUNKED @@ -453,7 +452,7 @@ cdef class HttpParser: if (ULLONG_MAX > self._cparser.content_length > 0 or chunked or self._cparser.method == 5 or # CONNECT: 5 (self._cparser.status_code >= 199 and - self._cparser.content_length == ULLONG_MAX and + self._cparser.content_length == 0 and self._read_until_eof) ): payload = StreamReader( @@ -485,7 +484,7 @@ cdef class HttpParser: pass cdef inline http_version(self): - cdef cparser.http_parser* parser = self._cparser + cdef cparser.llhttp_t* parser = self._cparser if parser.http_major == 1: if parser.http_minor == 0: @@ -504,12 +503,11 @@ cdef class HttpParser: if self._cparser.flags & cparser.F_CHUNKED: raise TransferEncodingError( "Not enough data for satisfy transfer length header.") - elif self._cparser.flags & cparser.F_CONTENTLENGTH: + elif self._cparser.flags & cparser.F_CONTENT_LENGTH: raise ContentLengthError( "Not enough data for satisfy content length header.") - elif self._cparser.http_errno != cparser.HPE_OK: - desc = cparser.http_errno_description( - self._cparser.http_errno) + elif cparser.llhttp_get_errno(self._cparser) != cparser.HPE_OK: + desc = cparser.llhttp_get_error_reason(self._cparser) raise PayloadEncodingError(desc.decode('latin-1')) else: self._payload.feed_eof() @@ -522,26 +520,30 @@ cdef class HttpParser: cdef: size_t data_len size_t nb + cdef cparser.llhttp_errno_t errno PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE) data_len = self.py_buf.len - nb = cparser.http_parser_execute( + errno = cparser.llhttp_execute( self._cparser, - self._csettings, self.py_buf.buf, data_len) + if errno is cparser.HPE_PAUSED_UPGRADE: + cparser.llhttp_resume_after_upgrade(self._cparser) + + nb = cparser.llhttp_get_error_pos(self._cparser) - self.py_buf.buf + PyBuffer_Release(&self.py_buf) - if (self._cparser.http_errno != cparser.HPE_OK): + if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED_UPGRADE): if self._payload_error == 0: if self._last_error is not None: ex = self._last_error self._last_error = None else: - ex = parser_error_from_errno( - self._cparser.http_errno) + ex = parser_error_from_errno(self._cparser) self._payload = None raise ex @@ -574,20 +576,13 @@ cdef class HttpRequestParser(HttpParser): auto_decompress) cdef object _on_status_complete(self): - cdef Py_buffer py_buf if not self._buf: return self._path = self._buf.decode('utf-8', 'surrogateescape') - if self._cparser.method == 5: # CONNECT + try: self._url = URL(self._path) - else: - PyObject_GetBuffer(self._buf, &py_buf, PyBUF_SIMPLE) - try: - self._url = _parse_url(py_buf.buf, - py_buf.len) - finally: - PyBuffer_Release(&py_buf) - PyByteArray_Resize(self._buf, 0) + finally: + PyByteArray_Resize(self._buf, 0) cdef class HttpResponseParser(HttpParser): @@ -610,7 +605,7 @@ cdef class HttpResponseParser(HttpParser): else: self._reason = self._reason or '' -cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: +cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data pyparser._started = True @@ -622,7 +617,7 @@ cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_url(cparser.http_parser* parser, +cdef int cb_on_url(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: @@ -637,7 +632,7 @@ cdef int cb_on_url(cparser.http_parser* parser, return 0 -cdef int cb_on_status(cparser.http_parser* parser, +cdef int cb_on_status(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef str reason @@ -653,7 +648,7 @@ cdef int cb_on_status(cparser.http_parser* parser, return 0 -cdef int cb_on_header_field(cparser.http_parser* parser, +cdef int cb_on_header_field(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef Py_ssize_t size @@ -671,7 +666,7 @@ cdef int cb_on_header_field(cparser.http_parser* parser, return 0 -cdef int cb_on_header_value(cparser.http_parser* parser, +cdef int cb_on_header_value(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef Py_ssize_t size @@ -688,7 +683,7 @@ cdef int cb_on_header_value(cparser.http_parser* parser, return 0 -cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1: +cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_status_complete() @@ -703,7 +698,7 @@ cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_body(cparser.http_parser* parser, +cdef int cb_on_body(cparser.llhttp_t* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef bytes body = at[:length] @@ -720,7 +715,7 @@ cdef int cb_on_body(cparser.http_parser* parser, return 0 -cdef int cb_on_message_complete(cparser.http_parser* parser) except -1: +cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._started = False @@ -732,7 +727,7 @@ cdef int cb_on_message_complete(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1: +cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_header() @@ -743,7 +738,7 @@ cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1: return 0 -cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1: +cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_complete() @@ -754,19 +749,21 @@ cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1: return 0 -cdef parser_error_from_errno(cparser.http_errno errno): - cdef bytes desc = cparser.http_errno_description(errno) - - if errno in (cparser.HPE_CB_message_begin, - cparser.HPE_CB_url, - cparser.HPE_CB_header_field, - cparser.HPE_CB_header_value, - cparser.HPE_CB_headers_complete, - cparser.HPE_CB_body, - cparser.HPE_CB_message_complete, - cparser.HPE_CB_status, - cparser.HPE_CB_chunk_header, - cparser.HPE_CB_chunk_complete): +cdef parser_error_from_errno(cparser.llhttp_t* parser): + cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser) + cdef bytes desc = cparser.llhttp_get_error_reason(parser) + + if errno in (cparser.HPE_CB_MESSAGE_BEGIN, + cparser.HPE_CB_HEADERS_COMPLETE, + cparser.HPE_CB_MESSAGE_COMPLETE, + cparser.HPE_CB_CHUNK_HEADER, + cparser.HPE_CB_CHUNK_COMPLETE, + cparser.HPE_INVALID_CONSTANT, + cparser.HPE_INVALID_HEADER_TOKEN, + cparser.HPE_INVALID_CONTENT_LENGTH, + cparser.HPE_INVALID_CHUNK_SIZE, + cparser.HPE_INVALID_EOF_STATE, + cparser.HPE_INVALID_TRANSFER_ENCODING): cls = BadHttpMessage elif errno == cparser.HPE_INVALID_STATUS: @@ -775,6 +772,9 @@ cdef parser_error_from_errno(cparser.http_errno errno): elif errno == cparser.HPE_INVALID_METHOD: cls = BadStatusLine + elif errno == cparser.HPE_INVALID_VERSION: + cls = BadStatusLine + elif errno == cparser.HPE_INVALID_URL: cls = InvalidURLError @@ -782,96 +782,3 @@ cdef parser_error_from_errno(cparser.http_errno errno): cls = BadHttpMessage return cls(desc.decode('latin-1')) - - -def parse_url(url): - cdef: - Py_buffer py_buf - char* buf_data - - PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) - try: - buf_data = py_buf.buf - return _parse_url(buf_data, py_buf.len) - finally: - PyBuffer_Release(&py_buf) - - -cdef _parse_url(char* buf_data, size_t length): - cdef: - cparser.http_parser_url* parsed - int res - str schema = None - str host = None - object port = None - str path = None - str query = None - str fragment = None - str user = None - str password = None - str userinfo = None - object result = None - int off - int ln - - parsed = \ - PyMem_Malloc(sizeof(cparser.http_parser_url)) - if parsed is NULL: - raise MemoryError() - cparser.http_parser_url_init(parsed) - try: - res = cparser.http_parser_parse_url(buf_data, length, 0, parsed) - - if res == 0: - if parsed.field_set & (1 << cparser.UF_SCHEMA): - off = parsed.field_data[cparser.UF_SCHEMA].off - ln = parsed.field_data[cparser.UF_SCHEMA].len - schema = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - schema = '' - - if parsed.field_set & (1 << cparser.UF_HOST): - off = parsed.field_data[cparser.UF_HOST].off - ln = parsed.field_data[cparser.UF_HOST].len - host = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - host = '' - - if parsed.field_set & (1 << cparser.UF_PORT): - port = parsed.port - - if parsed.field_set & (1 << cparser.UF_PATH): - off = parsed.field_data[cparser.UF_PATH].off - ln = parsed.field_data[cparser.UF_PATH].len - path = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - path = '' - - if parsed.field_set & (1 << cparser.UF_QUERY): - off = parsed.field_data[cparser.UF_QUERY].off - ln = parsed.field_data[cparser.UF_QUERY].len - query = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - query = '' - - if parsed.field_set & (1 << cparser.UF_FRAGMENT): - off = parsed.field_data[cparser.UF_FRAGMENT].off - ln = parsed.field_data[cparser.UF_FRAGMENT].len - fragment = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - else: - fragment = '' - - if parsed.field_set & (1 << cparser.UF_USERINFO): - off = parsed.field_data[cparser.UF_USERINFO].off - ln = parsed.field_data[cparser.UF_USERINFO].len - userinfo = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') - - user, sep, password = userinfo.partition(':') - - return URL_build(scheme=schema, - user=user, password=password, host=host, port=port, - path=path, query_string=query, fragment=fragment, encoded=True) - else: - raise InvalidURLError("invalid url {!r}".format(buf_data)) - finally: - PyMem_Free(parsed) diff --git a/setup.py b/setup.py index a73d331ea07..ef236cb275a 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ NO_EXTENSIONS = True -if IS_GIT_REPO and not (HERE / "vendor/http-parser/README.md").exists(): +if IS_GIT_REPO and not (HERE / "vendor/llhttp/README.md").exists(): print("Install submodules when building from git clone", file=sys.stderr) print("Hint:", file=sys.stderr) print(" git submodule update --init", file=sys.stderr) @@ -33,10 +33,13 @@ "aiohttp._http_parser", [ "aiohttp/_http_parser.c", - "vendor/http-parser/http_parser.c", "aiohttp/_find_header.c", + "vendor/llhttp/build/c/llhttp.c", + "vendor/llhttp/src/native/api.c", + "vendor/llhttp/src/native/http.c", ], - define_macros=[("HTTP_PARSER_STRICT", 0)], + define_macros=[("LLHTTP_STRICT_MODE", 0)], + include_dirs=["vendor/llhttp/build"], ), Extension("aiohttp._helpers", ["aiohttp/_helpers.c"]), Extension("aiohttp._http_writer", ["aiohttp/_http_writer.c"]), diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 80913ae4360..670261f8b11 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -13,6 +13,7 @@ import aiohttp from aiohttp import http_exceptions, streams from aiohttp.http_parser import ( + NO_EXTENSIONS, DeflateBuffer, HttpPayloadParser, HttpRequestParserPy, @@ -85,6 +86,14 @@ def stream(): return mock.Mock() +@pytest.mark.skipif(NO_EXTENSIONS, reason="Extentions available but not imported") +def test_c_parser_loaded(): + assert "HttpRequestParserC" in dir(aiohttp.http_parser) + assert "HttpResponseParserC" in dir(aiohttp.http_parser) + assert "RawRequestMessageC" in dir(aiohttp.http_parser) + assert "RawResponseMessageC" in dir(aiohttp.http_parser) + + def test_parse_headers(parser: Any) -> None: text = b"""GET /test HTTP/1.1\r test: line\r diff --git a/vendor/http-parser b/vendor/http-parser deleted file mode 160000 index 2343fd6b521..00000000000 --- a/vendor/http-parser +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2343fd6b5214b2ded2cdcf76de2bf60903bb90cd diff --git a/vendor/llhttp b/vendor/llhttp new file mode 160000 index 00000000000..c6a35cccf5c --- /dev/null +++ b/vendor/llhttp @@ -0,0 +1 @@ +Subproject commit c6a35cccf5c8b36f82036c23cf9c50a7dc2dbd0a