Skip to content

useCURL #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/.vitepress/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ export default withPwa(defineConfig({
text: 'useLogger',
link: '/api/logger',
},
{
text: 'useParser',
link: '/api/parser',
},
],
},
{
Expand Down Expand Up @@ -168,10 +172,6 @@ export default withPwa(defineConfig({
text: 'useTo',
link: '/utils/to',
},
{
text: 'useURL',
link: '/utils/url',
},
{
text: 'utils',
link: '/utils/utils',
Expand Down
57 changes: 57 additions & 0 deletions docs/api/parser.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
title: useLogger
outline: deep
---
# useParser

解析器,提供了一些常用的方法。

## curl <Badge type="warning" text="useCURL" />

用于解析CURL命令。
```python
from usepy import useParser, useCURL

test_curl_command = """
curl 'http://localhost:3333/api/parser.html' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'Cookie: Pycharm-fe1126fc=eb62da0d-7fad-4f5f-9396-37ee80df090c; csrftoken=SIf8rv13bnXYAarsi6aN6mpuHHZBjzKTBADTtouFI5U28Na8l9TFu9IFROY1auqH' \
-H 'Pragma: no-cache' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'Sec-Fetch-User: ?1' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76' \
-H 'sec-ch-ua: "Not?A_Brand";v="8", "Chromium";v="108", "Microsoft Edge";v="108"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
--compressed
"""
# curl = useParser.curl(test_curl_command)
curl = useCURL(test_curl_command)
print(curl.url) # http://localhost:3333/api/parser.html
print(curl.method) # GET
print(curl.data) # None
print(curl.headers) # ...
print(curl.cookies) # ...
```


## useURL

用于解析URL。

```python
from usepy import useURL

url = useURL("https://www.google.com/search?q=usepy&ie=utf-8")
```


| url.scheme | url.netloc | url.query | Coourl.query_dictl | url.path |
| ---------- | :------------: | :--------------: | :-----------------------------: | -------: |
| https | www.google.com | q=usepy&ie=utf-8 | `{'q': 'usepy', 'ie': 'utf-8'}` | /search |
25 changes: 0 additions & 25 deletions docs/utils/url.md

This file was deleted.

13 changes: 13 additions & 0 deletions example/parser_curl_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from usepy import useParser, useCURL

if __name__ == '__main__':
test_curl_command = """
your curl command
"""
# curl = useParser.curl(test_curl_command)
curl = useCURL(test_curl_command)
print(curl.url)
print(curl.method)
print(curl.data, type(curl.data))
print(curl.headers)
print(curl.cookies)
13 changes: 10 additions & 3 deletions src/usepy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
# logstash_handler,
# JsonFormatter
)
from .parser import (
useParser,
useCURL,
useURL
)
from .utils import (
useTimer,
useTimerManager,
Expand All @@ -39,8 +44,7 @@
useCleanHtml,
useBloomFilter,
useTo,
useIs,
useURL,
useIs
)
from .utils.bloom_filter import BloomFilter as useBloomFilter

Expand All @@ -64,6 +68,10 @@
'useLogger',
'useLoggerIntercept',
'useLoggerInterceptUvicorn',
# parser
'useParser',
'useCURL',
'useURL',
# utils
'useIs',
'useTo',
Expand All @@ -81,5 +89,4 @@
'useDateTime',
'usePath',
'useThread',
'useURL',
]
10 changes: 10 additions & 0 deletions src/usepy/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .curl import CURL as useCURL
from .url import URL as useURL


class Parser:
curl = useCURL
url = useURL


useParser = Parser
104 changes: 104 additions & 0 deletions src/usepy/parser/curl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""

curl 'http://testing.ceegdev.com:9090/api/user/profile' \
-H 'Accept: application/json, text/plain, */*' \
-H 'Accept-Language: zh-cn' \
-H 'Authorization: 0a96d4f1-375b-4846-85a2-5731a1a2b3ac' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'Origin: http://localhost:4000' \
-H 'Pragma: no-cache' \
-H 'Referer: http://localhost:4000/' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76' \
--compressed \
--insecure

"""
import argparse
import re
import shlex
from collections import OrderedDict
from typing import Optional, Dict, Generator, Tuple

from usepy.utils import useCookieToDict


def normalize_newlines(multiline_text):
return multiline_text.replace(" \\\n", " ").replace("^", "")


class CURL:

def __init__(self, curl_command):
tokens = shlex.split(normalize_newlines(curl_command))
self.args = self._parse_args(tokens)

@staticmethod
def _parse_args(tokens):
parser = argparse.ArgumentParser()
parser.add_argument('command')
parser.add_argument('url')
parser.add_argument('-d', '--data')
parser.add_argument('-b', '--data-binary', '--data-raw', default=None)
parser.add_argument('-X', default='')
parser.add_argument('-H', '--header', action='append', default=[])
parser.add_argument('--compressed', action='store_true')
parser.add_argument('-k', '--insecure', action='store_true')
parser.add_argument('--user', '-u', default=())
parser.add_argument('-i', '--include', action='store_true')
parser.add_argument('-s', '--silent', action='store_true')
return parser.parse_args(tokens)

@property
def url(self) -> str:
return self.args.url

@property
def method(self) -> str:
_method = self.args.X.lower() if self.args.X else ('post' if self.data else 'get')
return _method.upper()

@property
def data(self) -> Optional[str]:
return self.args.data or self.args.data_binary

@property
def auth(self) -> Optional[tuple]:
if self.args.user:
return tuple(self.args.user.split(':'))
return None

@property
def verify(self) -> bool:
return self.args.insecure

@property
def compressed(self) -> bool:
return self.args.compressed

# 优化这段代码
def _get_headers_kv(self) -> Generator[Tuple[str, str], None, None]:
for header in self.args.header:
if header.startswith(':'):
occurrence = [m.start() for m in re.finditer(':', header)]
key, value = header[:occurrence[1]], header[occurrence[1] + 1:]
else:
key, value = header.split(":", 1)
yield key, value.strip()

@property
def headers(self) -> Optional[Dict]:
_headers = OrderedDict()
for key, value in self._get_headers_kv():
# skip cookie
if key.lower().strip("$") == 'cookie':
continue
_headers[key] = value
return dict(_headers) if _headers else None

@property
def cookies(self) -> Optional[Dict]:
for key, value in self._get_headers_kv():
if key.lower().strip("$") == 'cookie':
return useCookieToDict(value)
return None
File renamed without changes.
1 change: 0 additions & 1 deletion src/usepy/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@
data_to_dict as useDataToDict,
sizeof_fmt as useSizeofFmt,
)
from .url import URL as useURL
2 changes: 1 addition & 1 deletion src/usepy/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def cookie_to_dict(cookies: str) -> dict:
:param cookies: cookie字符串
:return: dict
"""
return dict(x.split('=') for x in cookies.split('; ')) # noqa
return dict(x.split('=', 1) for x in cookies.split('; ')) # noqa


def headers_to_dict(headers: str) -> dict:
Expand Down
30 changes: 30 additions & 0 deletions tests/test_parser/test_curl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from usepy import useCURL


def test_curl():
test_curl_command = """
curl 'https://www.baidu.com/' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
-H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'Cookie: BIDUPSID=xxxxxxxx' \
-H 'Pragma: no-cache' \
-H 'Referer: https://www.baidu.com/' \
-H 'Sec-Fetch-Dest: document' \
-H 'Sec-Fetch-Mode: navigate' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'Sec-Fetch-User: ?1' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76' \
-H 'sec-ch-ua: "Not?A_Brand";v="8", "Chromium";v="108", "Microsoft Edge";v="108"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "macOS"' \
--compressed
"""
curl = useCURL(test_curl_command)
assert curl.url == 'https://www.baidu.com/'
assert curl.method == 'GET'
assert not curl.data
assert len(curl.headers.keys()) == 15
assert len(curl.cookies.keys()) == 1