Skip to content

Commit e3b6dc4

Browse files
committed
Add CLI interface with charset, scheme, param filtering and allowlist support
1 parent b58e96b commit e3b6dc4

File tree

6 files changed

+351
-5
lines changed

6 files changed

+351
-5
lines changed

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [2.1.0] - 2025-03-30
9+
10+
### Added
11+
12+
- New command-line interface (`url-normalize`) with support for:
13+
- Version information (`--version`, `-v`)
14+
- Charset selection (`--charset`, `-c`)
15+
- Default scheme override (`--default-scheme`, `-s`)
16+
- Query parameter filtering (`--filter-params`, `-f`)
17+
- Custom allowlist for query parameters (`--param-allowlist`, `-p`)
18+
19+
### Fixed
20+
21+
- Do not encode equals sign in fragment (Fixes #36)
22+
23+
### Internal
24+
25+
- Add GitHub Action to publish package to PyPI using uv
26+
827
## [2.0.1] - 2025-03-29
928

1029
### Fixed

README.md

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ pip install url-normalize
3838

3939
## Usage
4040

41-
Basic usage:
42-
4341
```python
4442
from url_normalize import url_normalize
4543

@@ -55,13 +53,15 @@ print(url_normalize("www.foo.com/foo", default_scheme="http"))
5553
print(url_normalize("www.google.com/search?q=test&utm_source=test", filter_params=True))
5654
# Output: https://www.google.com/search?q=test
5755

58-
# With custom parameter allowlist
56+
# With custom parameter allowlist as a dict
5957
print(url_normalize(
6058
"example.com?page=1&id=123&ref=test",
6159
filter_params=True,
6260
param_allowlist={"example.com": ["page", "id"]}
6361
))
6462
# Output: https://example.com?page=1&id=123
63+
64+
# With custom parameter allowlist as a list
6565
print(url_normalize(
6666
"example.com?page=1&id=123&ref=test",
6767
filter_params=True,
@@ -70,6 +70,31 @@ print(url_normalize(
7070
# Output: https://example.com?page=1&id=123
7171
```
7272

73+
### Command-line usage
74+
75+
You can also use `url-normalize` from the command line:
76+
77+
```bash
78+
$ url-normalize "www.foo.com:80/foo"
79+
# Output: https://www.foo.com/foo
80+
81+
# With custom default scheme
82+
$ url-normalize -s http "www.foo.com/foo"
83+
# Output: http://www.foo.com/foo
84+
85+
# With query parameter filtering
86+
$ url-normalize -f "www.google.com/search?q=test&utm_source=test"
87+
# Output: https://www.google.com/search?q=test
88+
89+
# With custom allowlist
90+
$ url-normalize -f -p page,id "example.com?page=1&id=123&ref=test"
91+
# Output: https://example.com/?page=1&id=123
92+
93+
# Via uv tool/uvx
94+
$ uvx url-normalize www.foo.com:80/foo
95+
# Output: https://www.foo.com:80/foo
96+
```
97+
7398
## Documentation
7499

75100
For a complete history of changes, see [CHANGELOG.md](CHANGELOG.md).

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "url-normalize"
3-
version = "2.0.1"
3+
version = "2.1.0"
44
description = "URL normalization for Python"
55
authors = [{ name = "Nikolay Panov", email = "github@npanov.com" }]
66
license = { text = "MIT" }
@@ -15,6 +15,9 @@ Repository = "https://github.com/niksite/url-normalize"
1515
Issues = "https://github.com/niksite/url-normalize/issues"
1616
Changelog = "https://github.com/niksite/url-normalize/blob/master/CHANGELOG.md"
1717

18+
[project.scripts]
19+
url-normalize = "url_normalize.cli:main"
20+
1821
[project.optional-dependencies]
1922
dev = [
2023
"mypy",

tests/test_cli.py

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
"""Tests for the command line interface."""
2+
3+
import subprocess
4+
import sys
5+
from unittest.mock import patch
6+
7+
import pytest
8+
9+
from url_normalize import __version__
10+
from url_normalize.cli import main
11+
12+
13+
def run_cli(*args: str) -> subprocess.CompletedProcess:
14+
"""Run the CLI command with given arguments.
15+
16+
Params:
17+
*args: Command line arguments to pass to the CLI.
18+
19+
Returns:
20+
A completed process with stdout, stderr, and return code.
21+
22+
"""
23+
command = [sys.executable, "-m", "url_normalize.cli", *list(args)]
24+
return subprocess.run( # noqa: S603
25+
command, capture_output=True, text=True, check=False
26+
)
27+
28+
29+
def test_cli_error_handling(capsys, monkeypatch):
30+
"""Test CLI error handling when URL normalization fails."""
31+
with patch("url_normalize.cli.url_normalize") as mock_normalize:
32+
mock_normalize.side_effect = Exception("Simulated error")
33+
monkeypatch.setattr("sys.argv", ["url-normalize", "http://example.com"])
34+
35+
with pytest.raises(SystemExit) as excinfo:
36+
main()
37+
38+
assert excinfo.value.code == 1
39+
captured = capsys.readouterr()
40+
assert "Error normalizing URL: Simulated error" in captured.err
41+
assert not captured.out
42+
43+
44+
def test_cli_basic_normalization() -> None:
45+
"""Test basic URL normalization via CLI."""
46+
url = "http://EXAMPLE.com/./path/../other/"
47+
expected = "http://example.com/other/"
48+
49+
result = run_cli(url)
50+
51+
assert result.returncode == 0
52+
assert result.stdout.strip() == expected
53+
assert not result.stderr
54+
55+
56+
def test_cli_basic_normalization_short_args() -> None:
57+
"""Test basic URL normalization via CLI using short arguments."""
58+
url = "http://EXAMPLE.com/./path/../other/"
59+
expected = "http://example.com/other/"
60+
# Using short args where applicable (none for the URL itself)
61+
62+
result = run_cli(url) # No short args needed for basic case
63+
64+
assert result.returncode == 0
65+
assert result.stdout.strip() == expected
66+
assert not result.stderr
67+
68+
69+
def test_cli_default_scheme() -> None:
70+
"""Test default scheme addition via CLI."""
71+
url = "//example.com"
72+
expected = "https://example.com/"
73+
74+
result = run_cli(url)
75+
76+
assert result.returncode == 0
77+
assert result.stdout.strip() == expected
78+
assert not result.stderr
79+
80+
81+
def test_cli_default_scheme_short_arg() -> None:
82+
"""Test default scheme addition via CLI using short argument."""
83+
url = "//example.com"
84+
expected = "https://example.com/"
85+
86+
result = run_cli(url) # Default scheme is implicit, no arg needed
87+
88+
assert result.returncode == 0
89+
assert result.stdout.strip() == expected
90+
assert not result.stderr
91+
92+
93+
def test_cli_custom_default_scheme() -> None:
94+
"""Test custom default scheme via CLI."""
95+
url = "//example.com"
96+
expected = "ftp://example.com/"
97+
98+
result = run_cli("--default-scheme", "ftp", url)
99+
100+
assert result.returncode == 0
101+
assert result.stdout.strip() == expected
102+
assert not result.stderr
103+
104+
105+
def test_cli_custom_default_scheme_short_arg() -> None:
106+
"""Test custom default scheme via CLI using short argument."""
107+
url = "//example.com"
108+
expected = "ftp://example.com/"
109+
110+
result = run_cli("-s", "ftp", url)
111+
112+
assert result.returncode == 0
113+
assert result.stdout.strip() == expected
114+
assert not result.stderr
115+
116+
117+
def test_cli_filter_params() -> None:
118+
"""Test parameter filtering via CLI."""
119+
url = "http://google.com?utm_source=test&q=1"
120+
expected = "http://google.com/?q=1"
121+
122+
result = run_cli("--filter-params", url)
123+
124+
assert result.returncode == 0
125+
assert result.stdout.strip() == expected
126+
assert not result.stderr
127+
128+
129+
def test_cli_filter_params_short_arg() -> None:
130+
"""Test parameter filtering via CLI using short argument."""
131+
url = "http://google.com?utm_source=test&q=1"
132+
expected = "http://google.com/?q=1"
133+
134+
result = run_cli("-f", url)
135+
136+
assert result.returncode == 0
137+
assert result.stdout.strip() == expected
138+
assert not result.stderr
139+
140+
141+
def test_cli_param_allowlist() -> None:
142+
"""Test parameter allowlist via CLI."""
143+
url = "http://example.com?remove=me&keep=this&remove_too=true"
144+
expected = "http://example.com/?keep=this"
145+
# Use filter_params to enable filtering, then allowlist to keep specific ones
146+
147+
result = run_cli("-f", "-p", "keep", url)
148+
149+
assert result.returncode == 0
150+
assert result.stdout.strip() == expected
151+
assert not result.stderr
152+
153+
154+
def test_cli_param_allowlist_multiple() -> None:
155+
"""Test parameter allowlist with multiple params via CLI."""
156+
url = "http://example.com?remove=me&keep=this&keep_too=yes&remove_too=true"
157+
expected = "http://example.com/?keep=this&keep_too=yes"
158+
159+
result = run_cli("-f", "-p", "keep,keep_too", url)
160+
161+
assert result.returncode == 0
162+
assert result.stdout.strip() == expected
163+
assert not result.stderr
164+
165+
166+
def test_cli_param_allowlist_without_filtering() -> None:
167+
"""Test allowlist has no effect if filtering is not enabled."""
168+
url = "http://example.com?remove=me&keep=this&remove_too=true"
169+
expected = "http://example.com/?remove=me&keep=this&remove_too=true"
170+
# Not using -f, so allowlist should be ignored
171+
172+
result = run_cli("-p", "keep", url)
173+
174+
assert result.returncode == 0
175+
assert result.stdout.strip() == expected
176+
assert not result.stderr
177+
178+
179+
def test_cli_no_url() -> None:
180+
"""Test CLI error when no URL is provided."""
181+
result = run_cli()
182+
183+
assert result.returncode != 0
184+
assert "the following arguments are required: url" in result.stderr
185+
186+
187+
def test_cli_version_long() -> None:
188+
"""Test version output with --version flag."""
189+
result = run_cli("--version")
190+
191+
assert result.returncode == 0
192+
assert __version__ in result.stdout
193+
assert not result.stderr
194+
195+
196+
def test_cli_version_short() -> None:
197+
"""Test version output with -v flag."""
198+
result = run_cli("-v")
199+
200+
assert result.returncode == 0
201+
assert __version__ in result.stdout
202+
assert not result.stderr
203+
204+
205+
@pytest.mark.skipif(
206+
sys.platform == "win32", reason="Charset handling differs on Windows CLI"
207+
)
208+
def test_cli_charset() -> None:
209+
"""Test charset handling via CLI (might be platform-dependent)."""
210+
# Example using Cyrillic characters which need correct encoding
211+
url = "http://пример.рф/path"
212+
expected_idn = "http://xn--e1afmkfd.xn--p1ai/path"
213+
214+
# Test with default UTF-8
215+
result_utf8 = run_cli(url)
216+
217+
assert result_utf8.returncode == 0
218+
assert result_utf8.stdout.strip() == expected_idn
219+
assert not result_utf8.stderr
220+
221+
# Test specifying UTF-8 explicitly
222+
result_charset = run_cli("--charset", "utf-8", url)
223+
224+
assert result_charset.returncode == 0
225+
assert result_charset.stdout.strip() == expected_idn
226+
assert not result_charset.stderr
227+
228+
# Test specifying UTF-8 explicitly using short arg
229+
result_charset_short = run_cli("-c", "utf-8", url)
230+
231+
assert result_charset_short.returncode == 0
232+
assert result_charset_short.stdout.strip() == expected_idn
233+
assert not result_charset_short.stderr

url_normalize/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@
88
from .url_normalize import url_normalize
99

1010
__license__ = "MIT"
11-
__version__ = "2.0.0"
11+
__version__ = "2.1.0"
1212

1313
__all__ = ["url_normalize"]

0 commit comments

Comments
 (0)