Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,12 @@ languages.
History
=======

0.4.1 *next release*
* **Bugfix** OSC Hyperlinks when broken mid-text by ``wrap()``. `PR #193`_.

0.4.0 *2026-01-25*
* **New** Functions `iter_graphemes_reverse()`_, `grapheme_boundary_before()`_.
* **Bugfix** OSC Hyperlinks should not be broken by ``wrap()``
* **New** Functions `iter_graphemes_reverse()`_, `grapheme_boundary_before()`_. `PR #192`_.
* **Bugfix** OSC Hyperlinks should not be broken by ``wrap()``. `PR #191`_.

0.3.5 *2026-01-24*
* **Bugfix** packaging of 0.3.4 contains a failing test.
Expand Down Expand Up @@ -624,6 +627,9 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`PR #184`: https://github.com/jquast/wcwidth/pull/184
.. _`PR #185`: https://github.com/jquast/wcwidth/pull/185
.. _`PR #188`: https://github.com/jquast/wcwidth/pull/188
.. _`PR #191`: https://github.com/jquast/wcwidth/pull/191
.. _`PR #192`: https://github.com/jquast/wcwidth/pull/192
.. _`PR #193`: https://github.com/jquast/wcwidth/pull/193
.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
.. _`jquast/blessed`: https://github.com/jquast/blessed
.. _`selectel/pyte`: https://github.com/selectel/pyte
Expand Down
25 changes: 25 additions & 0 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,31 @@ def test_wrap_with_ansi(benchmark):
benchmark(wcwidth.wrap, text, 20)


def test_wrap_hyperlink_no_id(benchmark):
"""Benchmark wrap() with OSC 8 hyperlinks without id (requires id generation)."""
# Multiple hyperlinks without ids, each spanning several words
link = '\x1b]8;;https://example.com/path\x1b\\click here for details\x1b]8;;\x1b\\'
text = f'See {link} and also {link} for more. Read {link} now. ' * 10
benchmark(wcwidth.wrap, text, 40)


def test_wrap_hyperlink_with_id(benchmark):
"""Benchmark wrap() with OSC 8 hyperlinks with existing ids."""
# Multiple hyperlinks with ids
link1 = '\x1b]8;id=a;https://example.com\x1b\\click here for details\x1b]8;;\x1b\\'
link2 = '\x1b]8;id=b;https://other.org\x1b\\visit this page now\x1b]8;;\x1b\\'
text = f'See {link1} and also {link2} for more. Read {link1} now. ' * 10
benchmark(wcwidth.wrap, text, 40)


def test_wrap_hyperlink_mixed(benchmark):
"""Benchmark wrap() with mixed OSC 8 hyperlinks (with and without ids)."""
no_id = '\x1b]8;;https://example.com\x1b\\link without id here\x1b]8;;\x1b\\'
with_id = '\x1b]8;id=x;https://other.org\x1b\\link with id here\x1b]8;;\x1b\\'
text = f'First {no_id} then {with_id} and {no_id} again {with_id} end. ' * 10
benchmark(wcwidth.wrap, text, 40)


def test_clip_ascii(benchmark):
"""Benchmark clip() with ASCII string."""
benchmark(wcwidth.clip, 'hello world', 0, 5)
Expand Down
86 changes: 86 additions & 0 deletions tests/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
from wcwidth import iter_sequences
from wcwidth.textwrap import SequenceTextWrapper, wrap


@pytest.fixture(autouse=True)
def mock_hyperlink_ids(monkeypatch):
"""Mock secrets.token_hex to return predictable IDs for testing."""
counter = 0

def fake_token_hex(n):
nonlocal counter
counter += 1
return f'{counter:0{n * 2}x}'

monkeypatch.setattr('secrets.token_hex', fake_token_hex)


SGR_RED = '\x1b[31m'
SGR_BLUE = '\x1b[34m'
SGR_BOLD = '\x1b[1m'
Expand Down Expand Up @@ -321,6 +335,78 @@ def test_wrap_tabsize_wide_chars(text, w, tabsize, expected):
6,
['foo', f'{OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL}', 'bar'],
),
( # hyperlink with internal space - breaks with id continuation (ST)
f'Go {OSC_START_ST}Click here{OSC_END_ST} now',
5,
[
'Go',
'\x1b]8;id=00000001;http://example.com\x1b\\Click\x1b]8;;\x1b\\',
'\x1b]8;id=00000001;http://example.com\x1b\\here\x1b]8;;\x1b\\',
'now',
],
),
( # hyperlink with internal space - breaks with id continuation (BEL)
f'Go {OSC_START_BEL}Click here{OSC_END_BEL} now',
5,
[
'Go',
'\x1b]8;id=00000001;http://example.com\x07Click\x1b]8;;\x07',
'\x1b]8;id=00000001;http://example.com\x07here\x1b]8;;\x07',
'now',
],
),
( # hyperlink with existing id= parameter is preserved
'\x1b]8;id=my-link;http://example.com\x1b\\Click here\x1b]8;;\x1b\\',
6,
[
'\x1b]8;id=my-link;http://example.com\x1b\\Click\x1b]8;;\x1b\\',
'\x1b]8;id=my-link;http://example.com\x1b\\here\x1b]8;;\x1b\\',
],
),
( # hyperlink spanning 3+ lines
f'{OSC_START_ST}one two three{OSC_END_ST}',
5,
[
'\x1b]8;id=00000001;http://example.com\x1b\\one\x1b]8;;\x1b\\',
'\x1b]8;id=00000001;http://example.com\x1b\\two\x1b]8;;\x1b\\',
'\x1b]8;id=00000001;http://example.com\x1b\\three\x1b]8;;\x1b\\',
],
),
( # multiple hyperlinks in same text
f'{OSC_START_ST}ab cd{OSC_END_ST} {OSC_START_BEL}ef gh{OSC_END_BEL}',
4,
[
'\x1b]8;id=00000001;http://example.com\x1b\\ab\x1b]8;;\x1b\\',
'\x1b]8;id=00000001;http://example.com\x1b\\cd\x1b]8;;\x1b\\',
'\x1b]8;id=00000002;http://example.com\x07ef\x1b]8;;\x07',
'\x1b]8;id=00000002;http://example.com\x07gh\x1b]8;;\x07',
],
),
( # long word inside hyperlink forces character-level breaking
f'{OSC_START_ST}abcdefgh{OSC_END_ST}',
3,
[
'\x1b]8;id=00000001;http://example.com\x1b\\abc\x1b]8;;\x1b\\',
'\x1b]8;id=00000001;http://example.com\x1b\\def\x1b]8;;\x1b\\',
'\x1b]8;id=00000001;http://example.com\x1b\\gh\x1b]8;;\x1b\\',
],
),
( # params with other keys but no id - id is prepended, other params preserved
'\x1b]8;foo=bar;http://example.com\x1b\\Click here\x1b]8;;\x1b\\',
6,
[
'\x1b]8;id=00000001:foo=bar;http://example.com\x1b\\Click\x1b]8;;\x1b\\',
'\x1b]8;id=00000001:foo=bar;http://example.com\x1b\\here\x1b]8;;\x1b\\',
],
),
( # id not at start of params (junk:id=given) - full params preserved
'\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\Click here\x1b]8;;\x1b\\',
6,
[
'\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\Click\x1b]8;;\x1b\\',
'\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\here\x1b]8;;\x1b\\',
],
),
]


Expand Down
124 changes: 117 additions & 7 deletions wcwidth/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
from __future__ import annotations

# std imports
import re
import secrets
import textwrap

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, NamedTuple

# local
from .wcwidth import width as _width
Expand All @@ -21,6 +23,35 @@
from typing import Any, Literal


class _HyperlinkState(NamedTuple):
"""State for tracking an open OSC 8 hyperlink across line breaks."""

url: str # hyperlink target URL
params: str # id=xxx and other key=value pairs separated by :
terminator: str # BEL (\x07) or ST (\x1b\\)


# Hyperlink parsing: captures (params, url, terminator)
_HYPERLINK_OPEN_RE = re.compile(r'\x1b]8;([^;]*);([^\x07\x1b]*)(\x07|\x1b\\)')


def _parse_hyperlink_open(seq: str) -> _HyperlinkState | None:
"""Parse OSC 8 open sequence, return state or None."""
if (m := _HYPERLINK_OPEN_RE.match(seq)):
return _HyperlinkState(url=m.group(2), params=m.group(1), terminator=m.group(3))
return None


def _make_hyperlink_open(url: str, params: str, terminator: str) -> str:
"""Generate OSC 8 open sequence."""
return f'\x1b]8;{params};{url}{terminator}'


def _make_hyperlink_close(terminator: str) -> str:
"""Generate OSC 8 close sequence."""
return f'\x1b]8;;{terminator}'


class SequenceTextWrapper(textwrap.TextWrapper):
"""
Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`.
Expand All @@ -35,8 +66,10 @@ class SequenceTextWrapper(textwrap.TextWrapper):
via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis
and variations, regional indicator flags, and combining characters.

OSC hyperlink sequences are treated as word boundaries, ensuring that text adjacent to
hyperlinks wraps correctly without breaking the hyperlink structure.
OSC 8 hyperlinks are handled specially: when a hyperlink must span multiple lines, each line
receives complete open/close sequences with a shared ``id`` parameter, ensuring terminals
treat the fragments as a single hyperlink for hover underlining. If the original hyperlink
already has an ``id`` parameter, it is preserved; otherwise, one is generated.
"""

def __init__(self, width: int = 70, *,
Expand All @@ -58,6 +91,11 @@ def __init__(self, width: int = 70, *,
self.tabsize = tabsize
self.ambiguous_width = ambiguous_width

@staticmethod
def _next_hyperlink_id() -> str:
"""Generate unique hyperlink id as 8-character hex string."""
return secrets.token_hex(4)

def _width(self, text: str) -> int:
"""Measure text width accounting for sequences."""
return _width(text, control_codes=self.control_codes, tabsize=self.tabsize,
Expand Down Expand Up @@ -120,15 +158,17 @@ def _split(self, text: str) -> list[str]: # pylint: disable=too-many-locals
stripped_text += char
prev_was_hyperlink_close = False
else:
is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07'))

# Conditionally insert space before OSC sequences to artificially create word
# boundary, but *not* before hyperlink close sequences, to ensure hyperlink is
# terminated on the same line.
is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07'))
if (segment.startswith('\x1b]') and stripped_text and not
stripped_text[-1].isspace()):
if not is_hyperlink_close:
stripped_text += ' '
char_end.append(original_pos)

# Escape sequences advance position but don't add to stripped text
original_pos += len(segment)
prev_was_hyperlink_close = is_hyperlink_close
Expand Down Expand Up @@ -176,15 +216,24 @@ def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-m
Wrap chunks into lines using sequence-aware width.

Override TextWrapper._wrap_chunks to use _width instead of len. Follows stdlib's algorithm:
greedily fill lines, handle long words.
greedily fill lines, handle long words. Also handle OSC hyperlink processing. When
hyperlinks span multiple lines, each line gets complete open/close sequences with matching
id parameters for hover underlining continuity per OSC 8 spec.
"""
# pylint: disable=too-many-branches
# pylint: disable=too-many-branches,too-many-statements,too-complex,too-many-locals
# pylint: disable=too-many-nested-blocks
# the hyperlink code in particular really pushes the complexity rating of this method.
# preferring to keep it "all in one method" because of so much local state and manipulation.
if not chunks:
return []

lines: list[str] = []
is_first_line = True

hyperlink_state: _HyperlinkState | None = None
# Track the id we're using for the current hyperlink continuation
current_hyperlink_id: str | None = None

# Arrange in reverse order so items can be efficiently popped
chunks = list(reversed(chunks))

Expand All @@ -196,6 +245,12 @@ def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-m
indent = self.initial_indent if is_first_line else self.subsequent_indent
line_width = self.width - self._width(indent)

# If continuing a hyperlink from previous line, prepend open sequence
if hyperlink_state is not None:
open_seq = _make_hyperlink_open(
hyperlink_state.url, hyperlink_state.params, hyperlink_state.terminator)
chunks[-1] = open_seq + chunks[-1]

# Drop leading whitespace (except at very start)
# When dropping, transfer any sequences to the next chunk.
# Only drop if there's actual whitespace text, not if it's only sequences.
Expand Down Expand Up @@ -241,6 +296,41 @@ def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-m

if current_line:
line_content = ''.join(current_line)

# Track hyperlink state through this line's content
new_state = self._track_hyperlink_state(line_content, hyperlink_state)

# If we end inside a hyperlink, append close sequence
if new_state is not None:
# Ensure we have an id for continuation
if current_hyperlink_id is None:
if 'id=' in new_state.params:
current_hyperlink_id = new_state.params
elif new_state.params:
# Prepend id to existing params (per OSC 8 spec, params can have
# multiple key=value pairs separated by :)
current_hyperlink_id = (
f'id={self._next_hyperlink_id()}:{new_state.params}')
else:
current_hyperlink_id = f'id={self._next_hyperlink_id()}'
line_content = line_content + _make_hyperlink_close(new_state.terminator)

# Also need to inject the id into the opening sequence if it didn't have one
if 'id=' not in new_state.params:
# Find and replace the original open sequence with one that has id
old_open = _make_hyperlink_open(
new_state.url, new_state.params, new_state.terminator)
new_open = _make_hyperlink_open(
new_state.url, current_hyperlink_id, new_state.terminator)
line_content = line_content.replace(old_open, new_open, 1)

# Update state for next line, using computed id
hyperlink_state = _HyperlinkState(
new_state.url, current_hyperlink_id, new_state.terminator)
else:
hyperlink_state = None
current_hyperlink_id = None # Reset id when hyperlink closes

# Strip trailing whitespace when drop_whitespace is enabled
# (matches CPython #140627 fix behavior)
if self.drop_whitespace:
Expand All @@ -250,6 +340,25 @@ def _wrap_chunks(self, chunks: list[str]) -> list[str]: # pylint: disable=too-m

return lines

def _track_hyperlink_state(
self, text: str,
state: _HyperlinkState | None) -> _HyperlinkState | None:
"""
Track hyperlink state through text.

:param text: Text to scan for hyperlink sequences.
:param state: Current state or None if outside hyperlink.
:returns: Updated state after processing text.
"""
for segment, is_seq in iter_sequences(text):
if is_seq:
parsed_link = _parse_hyperlink_open(segment)
if parsed_link is not None and parsed_link.url: # has URL = open
state = parsed_link
elif segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07')): # close
state = None
return state

def _handle_long_word(self, reversed_chunks: list[str],
cur_line: list[str], cur_len: int,
width: int) -> None:
Expand All @@ -264,8 +373,9 @@ def _handle_long_word(self, reversed_chunks: list[str],
else:
space_left = width - cur_len

chunk = reversed_chunks[-1]

if self.break_long_words:
chunk = reversed_chunks[-1]
break_at_hyphen = False
hyphen_end = 0

Expand Down
Loading