From 7cea67a00a724d5ac923a3210fb64ca7255f1fc2 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Wed, 27 Oct 2021 17:24:35 +0300 Subject: [PATCH] Revert "http-parser -> llhttp (#5364)" (#6143) This reverts commit d379c6b81fa32546c75a766e7a02b23851be29c6. --- .github/workflows/ci.yml | 40 +----- .gitignore | 1 - .gitmodules | 8 +- CHANGES/3561.feature | 1 - Makefile | 14 +- aiohttp/_cparser.pxd | 293 ++++++++++++++++---------------------- aiohttp/_http_parser.pyx | 207 +++++++++++++++++++-------- setup.py | 9 +- tests/test_http_parser.py | 9 -- vendor/http-parser | 1 + vendor/llhttp | 1 - 11 files changed, 282 insertions(+), 302 deletions(-) delete mode 100644 CHANGES/3561.feature create mode 160000 vendor/http-parser delete mode 160000 vendor/llhttp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81f96ffbb93..0b92a258785 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,41 +71,9 @@ jobs: run: | LC_ALL=C sort -c CONTRIBUTORS.txt - gen_llhttp: - name: Generate llhttp sources - needs: lint - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: true - - name: Cache llhttp generated files - uses: actions/cache@v2 - id: cache - with: - key: llhttp-${{ hashFiles('vendor/llhttp/package.json', 'vendor/llhttp/src/**/*') }} - path: vendor/llhttp/build - - name: Setup NodeJS - if: steps.cache.outputs.cache-hit != 'true' - uses: actions/setup-node@v2 - with: - node-version: '14' - - name: Generate llhttp sources - if: steps.cache.outputs.cache-hit != 'true' - run: | - make generate-llhttp - - name: Upload llhttp generated files - uses: actions/upload-artifact@v2 - with: - name: llhttp - path: vendor/llhttp/build - if-no-files-found: error - test: name: Test - needs: gen_llhttp + needs: lint strategy: matrix: pyver: [3.7, 3.8, 3.9, '3.10'] @@ -154,12 +122,6 @@ jobs: if: ${{ matrix.no-extensions == '' }} run: | make cythonize - - name: Restore llhttp generated files - if: ${{ matrix.no-extensions == '' }} - uses: actions/download-artifact@v2 - with: - name: llhttp - path: vendor/llhttp/build/ - name: Run unittests env: COLOR: 'yes' diff --git a/.gitignore b/.gitignore index 7d38dd91998..8556509c6f7 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,6 @@ .idea .install-cython .install-deps -.llhttp-gen .installed.cfg .mypy_cache .noseids diff --git a/.gitmodules b/.gitmodules index 5f9f397b8e2..c58945aa9fc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ -[submodule "vendor/llhttp"] - path = vendor/llhttp - url = https://github.com/nodejs/llhttp.git - branch = v3.0.0 +[submodule "vendor/http-parser"] + path = vendor/http-parser + url = git://github.com/nodejs/http-parser.git + branch = 54f55a2 diff --git a/CHANGES/3561.feature b/CHANGES/3561.feature deleted file mode 100644 index db42f95f08e..00000000000 --- a/CHANGES/3561.feature +++ /dev/null @@ -1 +0,0 @@ -Switch from http-parser to llhttp diff --git a/Makefile b/Makefile index c64173402b0..81b8a0325c3 100644 --- a/Makefile +++ b/Makefile @@ -62,15 +62,6 @@ aiohttp/_find_header.c: $(call to-hash,aiohttp/hdrs.py ./tools/gen.py) aiohttp/%.c: aiohttp/%.pyx $(call to-hash,$(CYS)) aiohttp/_find_header.c cython -3 -o $@ $< -I aiohttp -vendor/llhttp/node_modules: vendor/llhttp/package.json - cd vendor/llhttp; npm install - -.llhttp-gen: vendor/llhttp/node_modules - $(MAKE) -C vendor/llhttp generate - @touch .llhttp-gen - -.PHONY: generate-llhttp -generate-llhttp: .llhttp-gen .PHONY: cythonize cythonize: .install-cython $(PYXS:.pyx=.c) @@ -90,7 +81,7 @@ fmt format: mypy: mypy -.develop: .install-deps generate-llhttp $(call to-hash,$(PYS) $(CYS) $(CS)) +.develop: .install-deps $(call to-hash,$(PYS) $(CYS) $(CS)) pip install -e . @touch .develop @@ -146,9 +137,6 @@ clean: @rm -rf aiohttp.egg-info @rm -f .install-deps @rm -f .install-cython - @rm -rf vendor/llhttp/node_modules - @rm -f .llhttp-gen - @$(MAKE) -C vendor/llhttp clean .PHONY: doc doc: diff --git a/aiohttp/_cparser.pxd b/aiohttp/_cparser.pxd index 41f2f56e493..0f9fc009236 100644 --- a/aiohttp/_cparser.pxd +++ b/aiohttp/_cparser.pxd @@ -1,189 +1,140 @@ -from libc.stdint cimport ( - int8_t, - int16_t, - int32_t, - int64_t, - uint8_t, - uint16_t, - uint32_t, - uint64_t, -) - -cdef extern from "../vendor/llhttp/build/llhttp.h": - - struct llhttp__internal_s: - int32_t _index - void* _span_pos0 - void* _span_cb0 - int32_t error - const char* reason - const char* error_pos - void* data - void* _current +from libc.stdint cimport uint16_t, uint32_t, uint64_t + + +cdef extern from "../vendor/http-parser/http_parser.h": + ctypedef int (*http_data_cb) (http_parser*, + const char *at, + size_t length) except -1 + + ctypedef int (*http_cb) (http_parser*) except -1 + + struct http_parser: + unsigned int type + unsigned int flags + unsigned int state + unsigned int header_state + unsigned int index + + uint32_t nread uint64_t content_length - uint8_t type - uint8_t method - uint8_t http_major - uint8_t http_minor - uint8_t header_state - uint8_t lenient_flags - uint8_t upgrade - uint8_t finish - uint16_t flags - uint16_t status_code - void* settings - - ctypedef llhttp__internal_s llhttp__internal_t - ctypedef llhttp__internal_t llhttp_t - - ctypedef int (*llhttp_data_cb)(llhttp_t*, const char *at, size_t length) except -1 - ctypedef int (*llhttp_cb)(llhttp_t*) except -1 - - struct llhttp_settings_s: - llhttp_cb on_message_begin - llhttp_data_cb on_url - llhttp_data_cb on_status - llhttp_data_cb on_header_field - llhttp_data_cb on_header_value - llhttp_cb on_headers_complete - llhttp_data_cb on_body - llhttp_cb on_message_complete - llhttp_cb on_chunk_header - llhttp_cb on_chunk_complete - - llhttp_cb on_url_complete - llhttp_cb on_status_complete - llhttp_cb on_header_field_complete - llhttp_cb on_header_value_complete - - ctypedef llhttp_settings_s llhttp_settings_t - - enum llhttp_errno: + + unsigned short http_major + unsigned short http_minor + unsigned int status_code + unsigned int method + unsigned int http_errno + + unsigned int upgrade + + void *data + + struct http_parser_settings: + http_cb on_message_begin + http_data_cb on_url + http_data_cb on_status + http_data_cb on_header_field + http_data_cb on_header_value + http_cb on_headers_complete + http_data_cb on_body + http_cb on_message_complete + http_cb on_chunk_header + http_cb on_chunk_complete + + enum http_parser_type: + HTTP_REQUEST, + HTTP_RESPONSE, + HTTP_BOTH + + enum http_errno: HPE_OK, - HPE_INTERNAL, - HPE_STRICT, - HPE_LF_EXPECTED, - HPE_UNEXPECTED_CONTENT_LENGTH, + HPE_CB_message_begin, + HPE_CB_url, + HPE_CB_header_field, + HPE_CB_header_value, + HPE_CB_headers_complete, + HPE_CB_body, + HPE_CB_message_complete, + HPE_CB_status, + HPE_CB_chunk_header, + HPE_CB_chunk_complete, + HPE_INVALID_EOF_STATE, + HPE_HEADER_OVERFLOW, HPE_CLOSED_CONNECTION, + HPE_INVALID_VERSION, + HPE_INVALID_STATUS, HPE_INVALID_METHOD, HPE_INVALID_URL, - HPE_INVALID_CONSTANT, - HPE_INVALID_VERSION, + HPE_INVALID_HOST, + HPE_INVALID_PORT, + HPE_INVALID_PATH, + HPE_INVALID_QUERY_STRING, + HPE_INVALID_FRAGMENT, + HPE_LF_EXPECTED, HPE_INVALID_HEADER_TOKEN, HPE_INVALID_CONTENT_LENGTH, HPE_INVALID_CHUNK_SIZE, - HPE_INVALID_STATUS, - HPE_INVALID_EOF_STATE, - HPE_INVALID_TRANSFER_ENCODING, - HPE_CB_MESSAGE_BEGIN, - HPE_CB_HEADERS_COMPLETE, - HPE_CB_MESSAGE_COMPLETE, - HPE_CB_CHUNK_HEADER, - HPE_CB_CHUNK_COMPLETE, + HPE_INVALID_CONSTANT, + HPE_INVALID_INTERNAL_STATE, + HPE_STRICT, HPE_PAUSED, - HPE_PAUSED_UPGRADE, - HPE_USER - - ctypedef llhttp_errno llhttp_errno_t + HPE_UNKNOWN - enum llhttp_flags: + enum flags: + F_CHUNKED, F_CONNECTION_KEEP_ALIVE, F_CONNECTION_CLOSE, F_CONNECTION_UPGRADE, - F_CHUNKED, + F_TRAILING, F_UPGRADE, - F_CONTENT_LENGTH, F_SKIPBODY, - F_TRAILING, - F_TRANSFER_ENCODING + F_CONTENTLENGTH - enum llhttp_lenient_flags: - LENIENT_HEADERS, - LENIENT_CHUNKED_LENGTH + enum http_method: + DELETE, GET, HEAD, POST, PUT, CONNECT, OPTIONS, TRACE, COPY, + LOCK, MKCOL, MOVE, PROPFIND, PROPPATCH, SEARCH, UNLOCK, BIND, + REBIND, UNBIND, ACL, REPORT, MKACTIVITY, CHECKOUT, MERGE, + MSEARCH, NOTIFY, SUBSCRIBE, UNSUBSCRIBE, PATCH, PURGE, MKCALENDAR, + LINK, UNLINK - enum llhttp_type: - HTTP_REQUEST, - HTTP_RESPONSE, - HTTP_BOTH + void http_parser_init(http_parser *parser, http_parser_type type) + + size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) + + int http_should_keep_alive(const http_parser *parser) + + void http_parser_settings_init(http_parser_settings *settings) + + const char *http_errno_name(http_errno err) + const char *http_errno_description(http_errno err) + const char *http_method_str(http_method m) + + # URL Parser + + enum http_parser_url_fields: + UF_SCHEMA = 0, + UF_HOST = 1, + UF_PORT = 2, + UF_PATH = 3, + UF_QUERY = 4, + UF_FRAGMENT = 5, + UF_USERINFO = 6, + UF_MAX = 7 + + struct http_parser_url_field_data: + uint16_t off + uint16_t len + + struct http_parser_url: + uint16_t field_set + uint16_t port + http_parser_url_field_data[UF_MAX] field_data + + void http_parser_url_init(http_parser_url *u) - enum llhttp_finish_t: - HTTP_FINISH_SAFE, - HTTP_FINISH_SAFE_WITH_CB, - HTTP_FINISH_UNSAFE - - enum llhttp_method: - HTTP_DELETE, - HTTP_GET, - HTTP_HEAD, - HTTP_POST, - HTTP_PUT, - HTTP_CONNECT, - HTTP_OPTIONS, - HTTP_TRACE, - HTTP_COPY, - HTTP_LOCK, - HTTP_MKCOL, - HTTP_MOVE, - HTTP_PROPFIND, - HTTP_PROPPATCH, - HTTP_SEARCH, - HTTP_UNLOCK, - HTTP_BIND, - HTTP_REBIND, - HTTP_UNBIND, - HTTP_ACL, - HTTP_REPORT, - HTTP_MKACTIVITY, - HTTP_CHECKOUT, - HTTP_MERGE, - HTTP_MSEARCH, - HTTP_NOTIFY, - HTTP_SUBSCRIBE, - HTTP_UNSUBSCRIBE, - HTTP_PATCH, - HTTP_PURGE, - HTTP_MKCALENDAR, - HTTP_LINK, - HTTP_UNLINK, - HTTP_SOURCE, - HTTP_PRI, - HTTP_DESCRIBE, - HTTP_ANNOUNCE, - HTTP_SETUP, - HTTP_PLAY, - HTTP_PAUSE, - HTTP_TEARDOWN, - HTTP_GET_PARAMETER, - HTTP_SET_PARAMETER, - HTTP_REDIRECT, - HTTP_RECORD, - HTTP_FLUSH - - ctypedef llhttp_method llhttp_method_t; - - void llhttp_settings_init(llhttp_settings_t* settings) - void llhttp_init(llhttp_t* parser, llhttp_type type, - const llhttp_settings_t* settings) - - llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len) - llhttp_errno_t llhttp_finish(llhttp_t* parser) - - int llhttp_message_needs_eof(const llhttp_t* parser) - - int llhttp_should_keep_alive(const llhttp_t* parser) - - void llhttp_pause(llhttp_t* parser) - void llhttp_resume(llhttp_t* parser) - - void llhttp_resume_after_upgrade(llhttp_t* parser) - - llhttp_errno_t llhttp_get_errno(const llhttp_t* parser) - const char* llhttp_get_error_reason(const llhttp_t* parser) - void llhttp_set_error_reason(llhttp_t* parser, const char* reason) - const char* llhttp_get_error_pos(const llhttp_t* parser) - const char* llhttp_errno_name(llhttp_errno_t err) - - const char* llhttp_method_name(llhttp_method_t method) - - void llhttp_set_lenient_headers(llhttp_t* parser, int enabled) - void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled) + int http_parser_parse_url(const char *buf, + size_t buflen, + int is_connect, + http_parser_url *u) diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index a63548854a0..2a6d1ffa6f5 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -80,13 +80,13 @@ cdef inline object extend(object buf, const char* at, size_t length): memcpy(ptr + s, at, length) -DEF METHODS_COUNT = 46; +DEF METHODS_COUNT = 34; cdef list _http_method = [] for i in range(METHODS_COUNT): _http_method.append( - cparser.llhttp_method_name( i).decode('ascii')) + cparser.http_method_str( i).decode('ascii')) cdef inline str http_method_str(int i): @@ -272,8 +272,8 @@ cdef _new_response_message(object version, cdef class HttpParser: cdef: - cparser.llhttp_t* _cparser - cparser.llhttp_settings_t* _csettings + cparser.http_parser* _cparser + cparser.http_parser_settings* _csettings bytearray _raw_name bytearray _raw_value @@ -310,13 +310,13 @@ cdef class HttpParser: Py_buffer py_buf def __cinit__(self): - self._cparser = \ - PyMem_Malloc(sizeof(cparser.llhttp_t)) + self._cparser = \ + PyMem_Malloc(sizeof(cparser.http_parser)) if self._cparser is NULL: raise MemoryError() - self._csettings = \ - PyMem_Malloc(sizeof(cparser.llhttp_settings_t)) + self._csettings = \ + PyMem_Malloc(sizeof(cparser.http_parser_settings)) if self._csettings is NULL: raise MemoryError() @@ -324,18 +324,19 @@ cdef class HttpParser: PyMem_Free(self._cparser) PyMem_Free(self._csettings) - cdef _init(self, cparser.llhttp_type mode, + cdef _init(self, cparser.http_parser_type mode, object protocol, object loop, int limit, object timer=None, size_t max_line_size=8190, size_t max_headers=32768, size_t max_field_size=8190, payload_exception=None, bint response_with_body=True, bint read_until_eof=False, bint auto_decompress=True): - cparser.llhttp_settings_init(self._csettings) - cparser.llhttp_init(self._cparser, mode, self._csettings) + cparser.http_parser_init(self._cparser, mode) self._cparser.data = self self._cparser.content_length = 0 + cparser.http_parser_settings_init(self._csettings) + self._protocol = protocol self._loop = loop self._timer = timer @@ -416,7 +417,7 @@ cdef class HttpParser: self._process_header() method = http_method_str(self._cparser.method) - should_close = not cparser.llhttp_should_keep_alive(self._cparser) + should_close = not cparser.http_should_keep_alive(self._cparser) upgrade = self._cparser.upgrade chunked = self._cparser.flags & cparser.F_CHUNKED @@ -452,7 +453,7 @@ cdef class HttpParser: if (ULLONG_MAX > self._cparser.content_length > 0 or chunked or self._cparser.method == 5 or # CONNECT: 5 (self._cparser.status_code >= 199 and - self._cparser.content_length == 0 and + self._cparser.content_length == ULLONG_MAX and self._read_until_eof) ): payload = StreamReader( @@ -484,7 +485,7 @@ cdef class HttpParser: pass cdef inline http_version(self): - cdef cparser.llhttp_t* parser = self._cparser + cdef cparser.http_parser* parser = self._cparser if parser.http_major == 1: if parser.http_minor == 0: @@ -503,11 +504,12 @@ cdef class HttpParser: if self._cparser.flags & cparser.F_CHUNKED: raise TransferEncodingError( "Not enough data for satisfy transfer length header.") - elif self._cparser.flags & cparser.F_CONTENT_LENGTH: + elif self._cparser.flags & cparser.F_CONTENTLENGTH: raise ContentLengthError( "Not enough data for satisfy content length header.") - elif cparser.llhttp_get_errno(self._cparser) != cparser.HPE_OK: - desc = cparser.llhttp_get_error_reason(self._cparser) + elif self._cparser.http_errno != cparser.HPE_OK: + desc = cparser.http_errno_description( + self._cparser.http_errno) raise PayloadEncodingError(desc.decode('latin-1')) else: self._payload.feed_eof() @@ -520,30 +522,26 @@ cdef class HttpParser: cdef: size_t data_len size_t nb - cdef cparser.llhttp_errno_t errno PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE) data_len = self.py_buf.len - errno = cparser.llhttp_execute( + nb = cparser.http_parser_execute( self._cparser, + self._csettings, self.py_buf.buf, data_len) - if errno is cparser.HPE_PAUSED_UPGRADE: - cparser.llhttp_resume_after_upgrade(self._cparser) - - nb = cparser.llhttp_get_error_pos(self._cparser) - self.py_buf.buf - PyBuffer_Release(&self.py_buf) - if errno not in (cparser.HPE_OK, cparser.HPE_PAUSED_UPGRADE): + if (self._cparser.http_errno != cparser.HPE_OK): if self._payload_error == 0: if self._last_error is not None: ex = self._last_error self._last_error = None else: - ex = parser_error_from_errno(self._cparser) + ex = parser_error_from_errno( + self._cparser.http_errno) self._payload = None raise ex @@ -576,13 +574,20 @@ cdef class HttpRequestParser(HttpParser): auto_decompress) cdef object _on_status_complete(self): + cdef Py_buffer py_buf if not self._buf: return self._path = self._buf.decode('utf-8', 'surrogateescape') - try: + if self._cparser.method == 5: # CONNECT self._url = URL(self._path) - finally: - PyByteArray_Resize(self._buf, 0) + else: + PyObject_GetBuffer(self._buf, &py_buf, PyBUF_SIMPLE) + try: + self._url = _parse_url(py_buf.buf, + py_buf.len) + finally: + PyBuffer_Release(&py_buf) + PyByteArray_Resize(self._buf, 0) cdef class HttpResponseParser(HttpParser): @@ -605,7 +610,7 @@ cdef class HttpResponseParser(HttpParser): else: self._reason = self._reason or '' -cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: +cdef int cb_on_message_begin(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data pyparser._started = True @@ -617,7 +622,7 @@ cdef int cb_on_message_begin(cparser.llhttp_t* parser) except -1: return 0 -cdef int cb_on_url(cparser.llhttp_t* parser, +cdef int cb_on_url(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data try: @@ -632,7 +637,7 @@ cdef int cb_on_url(cparser.llhttp_t* parser, return 0 -cdef int cb_on_status(cparser.llhttp_t* parser, +cdef int cb_on_status(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef str reason @@ -648,7 +653,7 @@ cdef int cb_on_status(cparser.llhttp_t* parser, return 0 -cdef int cb_on_header_field(cparser.llhttp_t* parser, +cdef int cb_on_header_field(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef Py_ssize_t size @@ -666,7 +671,7 @@ cdef int cb_on_header_field(cparser.llhttp_t* parser, return 0 -cdef int cb_on_header_value(cparser.llhttp_t* parser, +cdef int cb_on_header_value(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef Py_ssize_t size @@ -683,7 +688,7 @@ cdef int cb_on_header_value(cparser.llhttp_t* parser, return 0 -cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: +cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_status_complete() @@ -698,7 +703,7 @@ cdef int cb_on_headers_complete(cparser.llhttp_t* parser) except -1: return 0 -cdef int cb_on_body(cparser.llhttp_t* parser, +cdef int cb_on_body(cparser.http_parser* parser, const char *at, size_t length) except -1: cdef HttpParser pyparser = parser.data cdef bytes body = at[:length] @@ -715,7 +720,7 @@ cdef int cb_on_body(cparser.llhttp_t* parser, return 0 -cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: +cdef int cb_on_message_complete(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._started = False @@ -727,7 +732,7 @@ cdef int cb_on_message_complete(cparser.llhttp_t* parser) except -1: return 0 -cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1: +cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_header() @@ -738,7 +743,7 @@ cdef int cb_on_chunk_header(cparser.llhttp_t* parser) except -1: return 0 -cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: +cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1: cdef HttpParser pyparser = parser.data try: pyparser._on_chunk_complete() @@ -749,21 +754,19 @@ cdef int cb_on_chunk_complete(cparser.llhttp_t* parser) except -1: return 0 -cdef parser_error_from_errno(cparser.llhttp_t* parser): - cdef cparser.llhttp_errno_t errno = cparser.llhttp_get_errno(parser) - cdef bytes desc = cparser.llhttp_get_error_reason(parser) - - if errno in (cparser.HPE_CB_MESSAGE_BEGIN, - cparser.HPE_CB_HEADERS_COMPLETE, - cparser.HPE_CB_MESSAGE_COMPLETE, - cparser.HPE_CB_CHUNK_HEADER, - cparser.HPE_CB_CHUNK_COMPLETE, - cparser.HPE_INVALID_CONSTANT, - cparser.HPE_INVALID_HEADER_TOKEN, - cparser.HPE_INVALID_CONTENT_LENGTH, - cparser.HPE_INVALID_CHUNK_SIZE, - cparser.HPE_INVALID_EOF_STATE, - cparser.HPE_INVALID_TRANSFER_ENCODING): +cdef parser_error_from_errno(cparser.http_errno errno): + cdef bytes desc = cparser.http_errno_description(errno) + + if errno in (cparser.HPE_CB_message_begin, + cparser.HPE_CB_url, + cparser.HPE_CB_header_field, + cparser.HPE_CB_header_value, + cparser.HPE_CB_headers_complete, + cparser.HPE_CB_body, + cparser.HPE_CB_message_complete, + cparser.HPE_CB_status, + cparser.HPE_CB_chunk_header, + cparser.HPE_CB_chunk_complete): cls = BadHttpMessage elif errno == cparser.HPE_INVALID_STATUS: @@ -772,9 +775,6 @@ cdef parser_error_from_errno(cparser.llhttp_t* parser): elif errno == cparser.HPE_INVALID_METHOD: cls = BadStatusLine - elif errno == cparser.HPE_INVALID_VERSION: - cls = BadStatusLine - elif errno == cparser.HPE_INVALID_URL: cls = InvalidURLError @@ -782,3 +782,96 @@ cdef parser_error_from_errno(cparser.llhttp_t* parser): cls = BadHttpMessage return cls(desc.decode('latin-1')) + + +def parse_url(url): + cdef: + Py_buffer py_buf + char* buf_data + + PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE) + try: + buf_data = py_buf.buf + return _parse_url(buf_data, py_buf.len) + finally: + PyBuffer_Release(&py_buf) + + +cdef _parse_url(char* buf_data, size_t length): + cdef: + cparser.http_parser_url* parsed + int res + str schema = None + str host = None + object port = None + str path = None + str query = None + str fragment = None + str user = None + str password = None + str userinfo = None + object result = None + int off + int ln + + parsed = \ + PyMem_Malloc(sizeof(cparser.http_parser_url)) + if parsed is NULL: + raise MemoryError() + cparser.http_parser_url_init(parsed) + try: + res = cparser.http_parser_parse_url(buf_data, length, 0, parsed) + + if res == 0: + if parsed.field_set & (1 << cparser.UF_SCHEMA): + off = parsed.field_data[cparser.UF_SCHEMA].off + ln = parsed.field_data[cparser.UF_SCHEMA].len + schema = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') + else: + schema = '' + + if parsed.field_set & (1 << cparser.UF_HOST): + off = parsed.field_data[cparser.UF_HOST].off + ln = parsed.field_data[cparser.UF_HOST].len + host = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') + else: + host = '' + + if parsed.field_set & (1 << cparser.UF_PORT): + port = parsed.port + + if parsed.field_set & (1 << cparser.UF_PATH): + off = parsed.field_data[cparser.UF_PATH].off + ln = parsed.field_data[cparser.UF_PATH].len + path = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') + else: + path = '' + + if parsed.field_set & (1 << cparser.UF_QUERY): + off = parsed.field_data[cparser.UF_QUERY].off + ln = parsed.field_data[cparser.UF_QUERY].len + query = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') + else: + query = '' + + if parsed.field_set & (1 << cparser.UF_FRAGMENT): + off = parsed.field_data[cparser.UF_FRAGMENT].off + ln = parsed.field_data[cparser.UF_FRAGMENT].len + fragment = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') + else: + fragment = '' + + if parsed.field_set & (1 << cparser.UF_USERINFO): + off = parsed.field_data[cparser.UF_USERINFO].off + ln = parsed.field_data[cparser.UF_USERINFO].len + userinfo = buf_data[off:off+ln].decode('utf-8', 'surrogateescape') + + user, sep, password = userinfo.partition(':') + + return URL_build(scheme=schema, + user=user, password=password, host=host, port=port, + path=path, query_string=query, fragment=fragment, encoded=True) + else: + raise InvalidURLError("invalid url {!r}".format(buf_data)) + finally: + PyMem_Free(parsed) diff --git a/setup.py b/setup.py index ef236cb275a..a73d331ea07 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ NO_EXTENSIONS = True -if IS_GIT_REPO and not (HERE / "vendor/llhttp/README.md").exists(): +if IS_GIT_REPO and not (HERE / "vendor/http-parser/README.md").exists(): print("Install submodules when building from git clone", file=sys.stderr) print("Hint:", file=sys.stderr) print(" git submodule update --init", file=sys.stderr) @@ -33,13 +33,10 @@ "aiohttp._http_parser", [ "aiohttp/_http_parser.c", + "vendor/http-parser/http_parser.c", "aiohttp/_find_header.c", - "vendor/llhttp/build/c/llhttp.c", - "vendor/llhttp/src/native/api.c", - "vendor/llhttp/src/native/http.c", ], - define_macros=[("LLHTTP_STRICT_MODE", 0)], - include_dirs=["vendor/llhttp/build"], + define_macros=[("HTTP_PARSER_STRICT", 0)], ), Extension("aiohttp._helpers", ["aiohttp/_helpers.c"]), Extension("aiohttp._http_writer", ["aiohttp/_http_writer.c"]), diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 670261f8b11..80913ae4360 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -13,7 +13,6 @@ import aiohttp from aiohttp import http_exceptions, streams from aiohttp.http_parser import ( - NO_EXTENSIONS, DeflateBuffer, HttpPayloadParser, HttpRequestParserPy, @@ -86,14 +85,6 @@ def stream(): return mock.Mock() -@pytest.mark.skipif(NO_EXTENSIONS, reason="Extentions available but not imported") -def test_c_parser_loaded(): - assert "HttpRequestParserC" in dir(aiohttp.http_parser) - assert "HttpResponseParserC" in dir(aiohttp.http_parser) - assert "RawRequestMessageC" in dir(aiohttp.http_parser) - assert "RawResponseMessageC" in dir(aiohttp.http_parser) - - def test_parse_headers(parser: Any) -> None: text = b"""GET /test HTTP/1.1\r test: line\r diff --git a/vendor/http-parser b/vendor/http-parser new file mode 160000 index 00000000000..2343fd6b521 --- /dev/null +++ b/vendor/http-parser @@ -0,0 +1 @@ +Subproject commit 2343fd6b5214b2ded2cdcf76de2bf60903bb90cd diff --git a/vendor/llhttp b/vendor/llhttp deleted file mode 160000 index c6a35cccf5c..00000000000 --- a/vendor/llhttp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c6a35cccf5c8b36f82036c23cf9c50a7dc2dbd0a