Skip to content

Commit

Permalink
Move path functions into their own module (#1362)
Browse files Browse the repository at this point in the history
  • Loading branch information
bdraco authored Oct 21, 2024
1 parent 12413d3 commit f42f34e
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 45 deletions.
4 changes: 2 additions & 2 deletions tests/test_normalize_path.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from yarl._url import _normalize_path
from yarl._path import normalize_path

PATHS = [
# No dots
Expand Down Expand Up @@ -33,4 +33,4 @@

@pytest.mark.parametrize("original,expected", PATHS)
def test__normalize_path(original, expected):
assert _normalize_path(original) == expected
assert normalize_path(original) == expected
41 changes: 41 additions & 0 deletions yarl/_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Utilities for working with paths."""

from collections.abc import Sequence
from contextlib import suppress


def normalize_path_segments(segments: Sequence[str]) -> list[str]:
"""Drop '.' and '..' from a sequence of str segments"""

resolved_path: list[str] = []

for seg in segments:
if seg == "..":
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
with suppress(IndexError):
resolved_path.pop()
elif seg != ".":
resolved_path.append(seg)

if segments and segments[-1] in (".", ".."):
# do some post-processing here.
# if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append("")

return resolved_path


def normalize_path(path: str) -> str:
# Drop '.' and '..' from str path
prefix = ""
if path and path[0] == "/":
# preserve the "/" root element of absolute paths, copying it to the
# normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986.
prefix = "/"
path = path[1:]

segments = path.split("/")
return prefix + "/".join(normalize_path_segments(segments))
49 changes: 6 additions & 43 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import sys
import warnings
from collections.abc import Mapping, Sequence
from contextlib import suppress
from functools import _CacheInfo, lru_cache
from ipaddress import ip_address
from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload
Expand All @@ -13,6 +12,7 @@
from propcache.api import under_cached_property as cached_property

from ._parse import USES_AUTHORITY, make_netloc, split_netloc, split_url, unsplit_result
from ._path import normalize_path, normalize_path_segments
from ._query import (
Query,
QueryVariable,
Expand Down Expand Up @@ -121,43 +121,6 @@ def rewrite_module(obj: _T) -> _T:
return obj


def _normalize_path_segments(segments: "Sequence[str]") -> list[str]:
"""Drop '.' and '..' from a sequence of str segments"""

resolved_path: list[str] = []

for seg in segments:
if seg == "..":
# ignore any .. segments that would otherwise cause an
# IndexError when popped from resolved_path if
# resolving for rfc3986
with suppress(IndexError):
resolved_path.pop()
elif seg != ".":
resolved_path.append(seg)

if segments and segments[-1] in (".", ".."):
# do some post-processing here.
# if the last segment was a relative dir,
# then we need to append the trailing '/'
resolved_path.append("")

return resolved_path


def _normalize_path(path: str) -> str:
# Drop '.' and '..' from str path
prefix = ""
if path and path[0] == "/":
# preserve the "/" root element of absolute paths, copying it to the
# normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986.
prefix = "/"
path = path[1:]

segments = path.split("/")
return prefix + "/".join(_normalize_path_segments(segments))


def _raise_for_authority_missing_abs_path() -> None:
"""Raise when he path in URL with authority starts lacks a leading slash."""
msg = "Path in a URL with authority should start with a slash ('/') if set"
Expand Down Expand Up @@ -306,7 +269,7 @@ def __new__(
path = PATH_REQUOTER(path)
if netloc:
if "." in path:
path = _normalize_path(path)
path = normalize_path(path)
if path[0] != "/":
_raise_for_authority_missing_abs_path()

Expand Down Expand Up @@ -411,7 +374,7 @@ def build(
path = PATH_QUOTER(path) if path else path
if path and netloc:
if "." in path:
path = _normalize_path(path)
path = normalize_path(path)
if path[0] != "/":
_raise_for_authority_missing_abs_path()

Expand Down Expand Up @@ -964,7 +927,7 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL":

if netloc := netloc:
# If the netloc is present, we need to ensure that the path is normalized
parsed = _normalize_path_segments(parsed) if needs_normalize else parsed
parsed = normalize_path_segments(parsed) if needs_normalize else parsed
if parsed and parsed[0] != "":
# inject a leading slash when adding a path to an absolute URL
# where there was none before
Expand Down Expand Up @@ -1082,7 +1045,7 @@ def with_path(self, path: str, *, encoded: bool = False) -> "URL":
if not encoded:
path = PATH_QUOTER(path)
if netloc:
path = _normalize_path(path) if "." in path else path
path = normalize_path(path) if "." in path else path
if path and path[0] != "/":
path = f"/{path}"
return self._from_tup((scheme, netloc, path, "", ""))
Expand Down Expand Up @@ -1325,7 +1288,7 @@ def join(self, url: "URL") -> "URL":
# which has to be removed
if orig_path[0] == "/":
path = path[1:]
path = _normalize_path(path) if "." in path else path
path = normalize_path(path) if "." in path else path

return self._from_tup((scheme, orig_netloc, path, query, fragment))

Expand Down

0 comments on commit f42f34e

Please sign in to comment.