Skip to content

fix(utils): properly url encode all params #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions scrapingbee/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
DEFAULT_HEADERS = {"User-Agent": f"ScrapingBee-Python/{__version__}"}


def process_url(url: str) -> str:
return urllib.parse.quote(url)


def process_js_snippet(js_snippet: str) -> str:
return base64.b64encode(js_snippet.encode()).decode()

Expand All @@ -34,7 +30,7 @@ def process_cookies(cookies: dict) -> str:

def process_json_stringify_param(param: dict, param_name: str) -> str:
if isinstance(param, dict):
return urllib.parse.quote(json.dumps(param))
return json.dumps(param)
else:
raise ValueError(f"{param_name} must be a dict or a stringified JSON")

Expand All @@ -44,8 +40,6 @@ def process_params(params: dict) -> dict:
for k, v in params.items():
if v in (None, '', [], {}):
continue
elif k == 'url':
new_params[k] = process_url(v)
elif k == 'js_snippet':
new_params[k] = process_js_snippet(v)
elif k == 'cookies':
Expand All @@ -71,6 +65,6 @@ def get_scrapingbee_url(api_url: str, api_key: str, url: str, params: dict) -> s
spb_params = process_params(all_params)

# Format url query string
qs = '&'.join(f'{k}={v}' for k, v in spb_params.items())
qs = urllib.parse.urlencode(spb_params)

return f'{api_url}?{qs}'
20 changes: 10 additions & 10 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_get(mock_session, client):
mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A//httpbin.org',
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org',
data=None,
headers=DEFAULT_HEADERS
)
Expand All @@ -33,7 +33,7 @@ def test_get_with_params(mock_session, client):
mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A//httpbin.org&render_js=True',
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&render_js=True',
data=None,
headers=DEFAULT_HEADERS,
)
Expand All @@ -47,7 +47,7 @@ def test_get_with_headers(mock_session, client):
mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A//httpbin.org&forward_headers=True',
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&forward_headers=True',
data=None,
headers={'Spb-Content-Type': 'text/html; charset=utf-8',
**DEFAULT_HEADERS},
Expand All @@ -65,7 +65,7 @@ def test_get_with_cookies(mock_session, client):
mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A//httpbin.org&cookies=name_1=value_1;name_2=value_2',
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&cookies=name_1%3Dvalue_1%3Bname_2%3Dvalue_2',
data=None,
headers=DEFAULT_HEADERS,
)
Expand All @@ -84,9 +84,9 @@ def test_get_with_extract_rules(mock_session, client):
mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A//httpbin.org&'
'extract_rules=%7B%22title%22%3A%20%22h1%22%2C%20%22'
'subtitle%22%3A%20%22%23subtitle%22%7D',
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
'extract_rules=%7B%22title%22%3A+%22h1%22%2C+%22'
'subtitle%22%3A+%22%23subtitle%22%7D',
data=None,
headers=DEFAULT_HEADERS,
)
Expand All @@ -106,8 +106,8 @@ def test_get_with_js_scenario(mock_session, client):
mock_session.return_value.request.assert_called_with(
'GET',
'https://app.scrapingbee.com/api/v1/'
'?api_key=API_KEY&url=https%3A//httpbin.org&'
'js_scenario=%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D',
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&'
'js_scenario=%7B%22instructions%22%3A+%5B%7B%22click%22%3A+%22%23buttonId%22%7D%5D%7D',
data=None,
headers=DEFAULT_HEADERS,
)
Expand All @@ -120,7 +120,7 @@ def test_post(mock_session, client):

mock_session.return_value.request.assert_called_with(
'POST',
'https://app.scrapingbee.com/api/v1/?api_key=API_KEY&url=https%3A//httpbin.org',
'https://app.scrapingbee.com/api/v1/?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org',
data={'KEY_1': 'VALUE_1'},
headers=DEFAULT_HEADERS
)
15 changes: 4 additions & 11 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from scrapingbee.utils import (
process_url,
process_js_snippet,
process_json_stringify_param,
process_headers,
Expand All @@ -9,12 +8,6 @@
)


def test_process_url():
'''It should encode the url'''
output = process_url('https://example.com?p=1')
assert output == 'https%3A//example.com%3Fp%3D1'


def test_process_js_snippet():
'''It should encode JavaScript code'''
output = process_js_snippet(
Expand Down Expand Up @@ -46,7 +39,7 @@ def test_process_extract_rules():
output = process_json_stringify_param({
'title': '.title'
}, 'extract_rules')
assert output == '%7B%22title%22%3A%20%22.title%22%7D'
assert output == '{"title": ".title"}'


def test_process_js_scenario():
Expand All @@ -56,7 +49,7 @@ def test_process_js_scenario():
{"click": "#buttonId"}
]
}, 'js_scenario')
assert output == '%7B%22instructions%22%3A%20%5B%7B%22click%22%3A%20%22%23buttonId%22%7D%5D%7D'
assert output == '{"instructions": [{"click": "#buttonId"}]}'


def test_process_params():
Expand All @@ -71,7 +64,7 @@ def test_get_scrapingbee_url():
'https://app.scrapingbee.com/api/v1/',
'API_KEY',
'https://httpbin.org',
{'render_js': True}
{'render_js': True, 'wait_for': '#foo'}
)
assert output == 'https://app.scrapingbee.com/api/v1/' \
'?api_key=API_KEY&url=https%3A//httpbin.org&render_js=True'
'?api_key=API_KEY&url=https%3A%2F%2Fhttpbin.org&render_js=True&wait_for=%23foo'