Skip to content

Commit ebfdaf8

Browse files
committed
feat: Add use_wcwidth for Asian character support
1 parent cf5e513 commit ebfdaf8

File tree

4 files changed

+71
-6
lines changed

4 files changed

+71
-6
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
wcwidth<1
12
typing-extensions>=3.7.4; python_version<'3.8'

table2ascii/options.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ class Options:
1616
alignments: list[Alignment] | None
1717
cell_padding: int
1818
style: TableStyle
19+
use_wcwidth: bool

table2ascii/table_to_ascii.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from math import ceil, floor
44

5+
from wcwidth import wcswidth
6+
57
from .alignment import Alignment
68
from .annotations import SupportsStr
79
from .options import Options
@@ -35,6 +37,7 @@ def __init__(
3537
self.__first_col_heading = options.first_col_heading
3638
self.__last_col_heading = options.last_col_heading
3739
self.__cell_padding = options.cell_padding
40+
self.__use_wcwidth = options.use_wcwidth
3841

3942
# calculate number of columns
4043
self.__columns = self.__count_columns()
@@ -86,7 +89,7 @@ def __auto_column_widths(self) -> list[int]:
8689
def widest_line(value: SupportsStr) -> int:
8790
"""Returns the width of the longest line in a multi-line string"""
8891
text = str(value)
89-
return max(len(line) for line in text.splitlines()) if len(text) else 0
92+
return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0
9093

9194
column_widths = []
9295
# get the width necessary for each column
@@ -140,17 +143,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st
140143
text = str(cell_value)
141144
padding = " " * self.__cell_padding
142145
padded_text = f"{padding}{text}{padding}"
146+
text_width = self.__str_width(padded_text)
143147
if alignment == Alignment.LEFT:
144148
# pad with spaces on the end
145-
return padded_text + (" " * (width - len(padded_text)))
149+
return padded_text + (" " * (width - text_width))
146150
if alignment == Alignment.CENTER:
147151
# pad with spaces, half on each side
148-
before = " " * floor((width - len(padded_text)) / 2)
149-
after = " " * ceil((width - len(padded_text)) / 2)
152+
before = " " * floor((width - text_width) / 2)
153+
after = " " * ceil((width - text_width) / 2)
150154
return before + padded_text + after
151155
if alignment == Alignment.RIGHT:
152156
# pad with spaces at the beginning
153-
return (" " * (width - len(padded_text))) + padded_text
157+
return (" " * (width - text_width)) + padded_text
154158
raise ValueError(f"The value '{alignment}' is not valid for alignment.")
155159

156160
def __row_to_ascii(
@@ -339,6 +343,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str:
339343
for row in body
340344
)
341345

346+
def __str_width(self, text: str) -> int:
347+
"""
348+
Returns the width of the string in characters for the purposes of monospace
349+
formatting. This is usually the same as the length of the string, but can be
350+
different for double-width characters (East Asian Wide and East Asian Fullwidth)
351+
or zero-width characters (combining characters, zero-width space, etc.)
352+
353+
Args:
354+
text: The text to measure
355+
356+
Returns:
357+
The width of the string in characters
358+
"""
359+
width = wcswidth(text) if self.__use_wcwidth else -1
360+
# if use_wcwidth is False or wcswidth fails, fall back to len
361+
return width if width >= 0 else len(text)
362+
342363
def to_ascii(self) -> str:
343364
"""Generates a formatted ASCII table
344365
@@ -375,6 +396,7 @@ def table2ascii(
375396
alignments: list[Alignment] | None = None,
376397
cell_padding: int = 1,
377398
style: TableStyle = PresetStyle.double_thin_compact,
399+
use_wcwidth: bool = False,
378400
) -> str:
379401
"""Convert a 2D Python table to ASCII text
380402
@@ -391,7 +413,7 @@ def table2ascii(
391413
Defaults to :py:obj:`False`.
392414
column_widths: List of widths in characters for each column. Any value of :py:obj:`None`
393415
indicates that the column width should be determined automatically. If :py:obj:`None`
394-
is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized.
416+
is passed instead of a :class:`list`, all columns will be automatically sized.
395417
Defaults to :py:obj:`None`.
396418
alignments: List of alignments for each column
397419
(ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to
@@ -401,6 +423,11 @@ def table2ascii(
401423
Defaults to ``1``.
402424
style: Table style to use for styling (preset styles can be imported).
403425
Defaults to :ref:`PresetStyle.double_thin_compact <PresetStyle.double_thin_compact>`.
426+
use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of
427+
:func:`len`. This is useful when dealing with double-width characters
428+
(East Asian Wide and East Asian Fullwidth) or zero-width characters
429+
(combining characters, zero-width space, etc.) which are not properly handled by :func:`len`.
430+
Defaults to :py:obj:`False`.
404431
405432
Returns:
406433
The generated ASCII table
@@ -416,5 +443,6 @@ def table2ascii(
416443
alignments=alignments,
417444
cell_padding=cell_padding,
418445
style=style,
446+
use_wcwidth=use_wcwidth,
419447
),
420448
).to_ascii()

tests/test_convert.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,38 @@ def test_multiline_cells():
246246
"╚═══════════════════════════════════════════╝"
247247
)
248248
assert text == expected
249+
250+
251+
def test_east_asian_wide_characters_and_zero_width():
252+
# using len() to count the number of characters
253+
text = t2a(
254+
header=["日期", "test"],
255+
body=[["2022/12/11", "test"], ["2022/1/1", "測試"]],
256+
cell_padding=5,
257+
)
258+
expected = (
259+
"╔═══════════════════════════════════╗\n"
260+
"║ 日期 test ║\n"
261+
"╟───────────────────────────────────╢\n"
262+
"║ 2022/12/11 test ║\n"
263+
"║ 2022/1/1 測試 ║\n"
264+
"╚═══════════════════════════════════╝"
265+
)
266+
assert text == expected
267+
268+
# using wcwidth.wcswidth() to count the number of characters
269+
text = t2a(
270+
header=["日期", "test"],
271+
body=[["2022/12/11", "test"], ["2022/1/1", "測試"]],
272+
cell_padding=5,
273+
use_wcwidth=True,
274+
)
275+
expected = (
276+
"╔═══════════════════════════════════╗\n"
277+
"║ 日期 test ║\n"
278+
"╟───────────────────────────────────╢\n"
279+
"║ 2022/12/11 test ║\n"
280+
"║ 2022/1/1 測試 ║\n"
281+
"╚═══════════════════════════════════╝"
282+
)
283+
assert text == expected

0 commit comments

Comments
 (0)