Add CLI interface with charset, scheme, param filtering and allowlist support

niksite · niksite · commit e3b6dc4d2917 · 2025-03-30T15:08:29.000-07:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.1.0] - 2025-03-30
+
+### Added
+
+- New command-line interface (`url-normalize`) with support for:
+  - Version information (`--version`, `-v`)
+  - Charset selection (`--charset`, `-c`)
+  - Default scheme override (`--default-scheme`, `-s`)
+  - Query parameter filtering (`--filter-params`, `-f`)
+  - Custom allowlist for query parameters (`--param-allowlist`, `-p`)
+
+### Fixed
+
+- Do not encode equals sign in fragment (Fixes #36)
+
+### Internal
+
+- Add GitHub Action to publish package to PyPI using uv
+
 ## [2.0.1] - 2025-03-29
 
 ### Fixed
diff --git a/README.md b/README.md
@@ -38,8 +38,6 @@ pip install url-normalize
 
 ## Usage
 
-Basic usage:
-
 ```python
 from url_normalize import url_normalize
 
@@ -55,13 +53,15 @@ print(url_normalize("www.foo.com/foo", default_scheme="http"))
 print(url_normalize("www.google.com/search?q=test&utm_source=test", filter_params=True))
 # Output: https://www.google.com/search?q=test
 
-# With custom parameter allowlist
+# With custom parameter allowlist as a dict
 print(url_normalize(
     "example.com?page=1&id=123&ref=test",
     filter_params=True,
     param_allowlist={"example.com": ["page", "id"]}
 ))
 # Output: https://example.com?page=1&id=123
+
+# With custom parameter allowlist as a list
 print(url_normalize(
     "example.com?page=1&id=123&ref=test",
     filter_params=True,
@@ -70,6 +70,31 @@ print(url_normalize(
 # Output: https://example.com?page=1&id=123
 ```
 
+### Command-line usage
+
+You can also use `url-normalize` from the command line:
+
+```bash
+$ url-normalize "www.foo.com:80/foo"
+# Output: https://www.foo.com/foo
+
+# With custom default scheme
+$ url-normalize -s http "www.foo.com/foo"
+# Output: http://www.foo.com/foo
+
+# With query parameter filtering
+$ url-normalize -f "www.google.com/search?q=test&utm_source=test"
+# Output: https://www.google.com/search?q=test
+
+# With custom allowlist
+$ url-normalize -f -p page,id "example.com?page=1&id=123&ref=test"
+# Output: https://example.com/?page=1&id=123
+
+# Via uv tool/uvx
+$ uvx url-normalize www.foo.com:80/foo
+# Output: https://www.foo.com:80/foo
+```
+
 ## Documentation
 
 For a complete history of changes, see [CHANGELOG.md](CHANGELOG.md).
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "url-normalize"
-version = "2.0.1"
+version = "2.1.0"
 description = "URL normalization for Python"
 authors = [{ name = "Nikolay Panov", email = "github@npanov.com" }]
 license = { text = "MIT" }
@@ -15,6 +15,9 @@ Repository = "https://github.com/niksite/url-normalize"
 Issues = "https://github.com/niksite/url-normalize/issues"
 Changelog = "https://github.com/niksite/url-normalize/blob/master/CHANGELOG.md"
 
+[project.scripts]
+url-normalize = "url_normalize.cli:main"
+
 [project.optional-dependencies]
 dev = [
   "mypy",
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,233 @@
+"""Tests for the command line interface."""
+
+import subprocess
+import sys
+from unittest.mock import patch
+
+import pytest
+
+from url_normalize import __version__
+from url_normalize.cli import main
+
+
+def run_cli(*args: str) -> subprocess.CompletedProcess:
+    """Run the CLI command with given arguments.
+
+    Params:
+        *args: Command line arguments to pass to the CLI.
+
+    Returns:
+        A completed process with stdout, stderr, and return code.
+
+    """
+    command = [sys.executable, "-m", "url_normalize.cli", *list(args)]
+    return subprocess.run(  # noqa: S603
+        command, capture_output=True, text=True, check=False
+    )
+
+
+def test_cli_error_handling(capsys, monkeypatch):
+    """Test CLI error handling when URL normalization fails."""
+    with patch("url_normalize.cli.url_normalize") as mock_normalize:
+        mock_normalize.side_effect = Exception("Simulated error")
+        monkeypatch.setattr("sys.argv", ["url-normalize", "http://example.com"])
+
+        with pytest.raises(SystemExit) as excinfo:
+            main()
+
+        assert excinfo.value.code == 1
+        captured = capsys.readouterr()
+        assert "Error normalizing URL: Simulated error" in captured.err
+        assert not captured.out
+
+
+def test_cli_basic_normalization() -> None:
+    """Test basic URL normalization via CLI."""
+    url = "http://EXAMPLE.com/./path/../other/"
+    expected = "http://example.com/other/"
+
+    result = run_cli(url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_basic_normalization_short_args() -> None:
+    """Test basic URL normalization via CLI using short arguments."""
+    url = "http://EXAMPLE.com/./path/../other/"
+    expected = "http://example.com/other/"
+    # Using short args where applicable (none for the URL itself)
+
+    result = run_cli(url)  # No short args needed for basic case
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_default_scheme() -> None:
+    """Test default scheme addition via CLI."""
+    url = "//example.com"
+    expected = "https://example.com/"
+
+    result = run_cli(url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_default_scheme_short_arg() -> None:
+    """Test default scheme addition via CLI using short argument."""
+    url = "//example.com"
+    expected = "https://example.com/"
+
+    result = run_cli(url)  # Default scheme is implicit, no arg needed
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_custom_default_scheme() -> None:
+    """Test custom default scheme via CLI."""
+    url = "//example.com"
+    expected = "ftp://example.com/"
+
+    result = run_cli("--default-scheme", "ftp", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_custom_default_scheme_short_arg() -> None:
+    """Test custom default scheme via CLI using short argument."""
+    url = "//example.com"
+    expected = "ftp://example.com/"
+
+    result = run_cli("-s", "ftp", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_filter_params() -> None:
+    """Test parameter filtering via CLI."""
+    url = "http://google.com?utm_source=test&q=1"
+    expected = "http://google.com/?q=1"
+
+    result = run_cli("--filter-params", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_filter_params_short_arg() -> None:
+    """Test parameter filtering via CLI using short argument."""
+    url = "http://google.com?utm_source=test&q=1"
+    expected = "http://google.com/?q=1"
+
+    result = run_cli("-f", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_param_allowlist() -> None:
+    """Test parameter allowlist via CLI."""
+    url = "http://example.com?remove=me&keep=this&remove_too=true"
+    expected = "http://example.com/?keep=this"
+    # Use filter_params to enable filtering, then allowlist to keep specific ones
+
+    result = run_cli("-f", "-p", "keep", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_param_allowlist_multiple() -> None:
+    """Test parameter allowlist with multiple params via CLI."""
+    url = "http://example.com?remove=me&keep=this&keep_too=yes&remove_too=true"
+    expected = "http://example.com/?keep=this&keep_too=yes"
+
+    result = run_cli("-f", "-p", "keep,keep_too", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_param_allowlist_without_filtering() -> None:
+    """Test allowlist has no effect if filtering is not enabled."""
+    url = "http://example.com?remove=me&keep=this&remove_too=true"
+    expected = "http://example.com/?remove=me&keep=this&remove_too=true"
+    # Not using -f, so allowlist should be ignored
+
+    result = run_cli("-p", "keep", url)
+
+    assert result.returncode == 0
+    assert result.stdout.strip() == expected
+    assert not result.stderr
+
+
+def test_cli_no_url() -> None:
+    """Test CLI error when no URL is provided."""
+    result = run_cli()
+
+    assert result.returncode != 0
+    assert "the following arguments are required: url" in result.stderr
+
+
+def test_cli_version_long() -> None:
+    """Test version output with --version flag."""
+    result = run_cli("--version")
+
+    assert result.returncode == 0
+    assert __version__ in result.stdout
+    assert not result.stderr
+
+
+def test_cli_version_short() -> None:
+    """Test version output with -v flag."""
+    result = run_cli("-v")
+
+    assert result.returncode == 0
+    assert __version__ in result.stdout
+    assert not result.stderr
+
+
+@pytest.mark.skipif(
+    sys.platform == "win32", reason="Charset handling differs on Windows CLI"
+)
+def test_cli_charset() -> None:
+    """Test charset handling via CLI (might be platform-dependent)."""
+    # Example using Cyrillic characters which need correct encoding
+    url = "http://пример.рф/path"
+    expected_idn = "http://xn--e1afmkfd.xn--p1ai/path"
+
+    # Test with default UTF-8
+    result_utf8 = run_cli(url)
+
+    assert result_utf8.returncode == 0
+    assert result_utf8.stdout.strip() == expected_idn
+    assert not result_utf8.stderr
+
+    # Test specifying UTF-8 explicitly
+    result_charset = run_cli("--charset", "utf-8", url)
+
+    assert result_charset.returncode == 0
+    assert result_charset.stdout.strip() == expected_idn
+    assert not result_charset.stderr
+
+    # Test specifying UTF-8 explicitly using short arg
+    result_charset_short = run_cli("-c", "utf-8", url)
+
+    assert result_charset_short.returncode == 0
+    assert result_charset_short.stdout.strip() == expected_idn
+    assert not result_charset_short.stderr
diff --git a/url_normalize/__init__.py b/url_normalize/__init__.py
@@ -8,6 +8,6 @@
 from .url_normalize import url_normalize
 
 __license__ = "MIT"
-__version__ = "2.0.0"
+__version__ = "2.1.0"
 
 __all__ = ["url_normalize"]
diff --git a/url_normalize/cli.py b/url_normalize/cli.py