feat: Add use_wcwidth for Asian character support

DenverCoder1 · DenverCoder1 · commit ebfdaf8e9f1c · 2022-12-12T02:32:11.000-07:00
diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
+wcwidth<1
 typing-extensions>=3.7.4; python_version<'3.8'
diff --git a/table2ascii/options.py b/table2ascii/options.py
@@ -16,3 +16,4 @@ class Options:
     alignments: list[Alignment] | None
     cell_padding: int
     style: TableStyle
+    use_wcwidth: bool
diff --git a/table2ascii/table_to_ascii.py b/table2ascii/table_to_ascii.py
@@ -2,6 +2,8 @@
 
 from math import ceil, floor
 
+from wcwidth import wcswidth
+
 from .alignment import Alignment
 from .annotations import SupportsStr
 from .options import Options
@@ -35,6 +37,7 @@ def __init__(
         self.__first_col_heading = options.first_col_heading
         self.__last_col_heading = options.last_col_heading
         self.__cell_padding = options.cell_padding
+        self.__use_wcwidth = options.use_wcwidth
 
         # calculate number of columns
         self.__columns = self.__count_columns()
@@ -86,7 +89,7 @@ def __auto_column_widths(self) -> list[int]:
         def widest_line(value: SupportsStr) -> int:
             """Returns the width of the longest line in a multi-line string"""
             text = str(value)
-            return max(len(line) for line in text.splitlines()) if len(text) else 0
+            return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0
 
         column_widths = []
         # get the width necessary for each column
@@ -140,17 +143,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st
         text = str(cell_value)
         padding = " " * self.__cell_padding
         padded_text = f"{padding}{text}{padding}"
+        text_width = self.__str_width(padded_text)
         if alignment == Alignment.LEFT:
             # pad with spaces on the end
-            return padded_text + (" " * (width - len(padded_text)))
+            return padded_text + (" " * (width - text_width))
         if alignment == Alignment.CENTER:
             # pad with spaces, half on each side
-            before = " " * floor((width - len(padded_text)) / 2)
-            after = " " * ceil((width - len(padded_text)) / 2)
+            before = " " * floor((width - text_width) / 2)
+            after = " " * ceil((width - text_width) / 2)
             return before + padded_text + after
         if alignment == Alignment.RIGHT:
             # pad with spaces at the beginning
-            return (" " * (width - len(padded_text))) + padded_text
+            return (" " * (width - text_width)) + padded_text
         raise ValueError(f"The value '{alignment}' is not valid for alignment.")
 
     def __row_to_ascii(
@@ -339,6 +343,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str:
             for row in body
         )
 
+    def __str_width(self, text: str) -> int:
+        """
+        Returns the width of the string in characters for the purposes of monospace
+        formatting. This is usually the same as the length of the string, but can be
+        different for double-width characters (East Asian Wide and East Asian Fullwidth)
+        or zero-width characters (combining characters, zero-width space, etc.)
+
+        Args:
+            text: The text to measure
+
+        Returns:
+            The width of the string in characters
+        """
+        width = wcswidth(text) if self.__use_wcwidth else -1
+        # if use_wcwidth is False or wcswidth fails, fall back to len
+        return width if width >= 0 else len(text)
+
     def to_ascii(self) -> str:
         """Generates a formatted ASCII table
 
@@ -375,6 +396,7 @@ def table2ascii(
     alignments: list[Alignment] | None = None,
     cell_padding: int = 1,
     style: TableStyle = PresetStyle.double_thin_compact,
+    use_wcwidth: bool = False,
 ) -> str:
     """Convert a 2D Python table to ASCII text
 
@@ -391,7 +413,7 @@ def table2ascii(
             Defaults to :py:obj:`False`.
         column_widths: List of widths in characters for each column. Any value of :py:obj:`None`
             indicates that the column width should be determined automatically. If :py:obj:`None`
-            is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized.
+            is passed instead of a :class:`list`, all columns will be automatically sized.
             Defaults to :py:obj:`None`.
         alignments: List of alignments for each column
             (ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to
@@ -401,6 +423,11 @@ def table2ascii(
             Defaults to ``1``.
         style: Table style to use for styling (preset styles can be imported).
             Defaults to :ref:`PresetStyle.double_thin_compact <PresetStyle.double_thin_compact>`.
+        use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of
+            :func:`len`. This is useful when dealing with double-width characters
+            (East Asian Wide and East Asian Fullwidth) or zero-width characters
+            (combining characters, zero-width space, etc.) which are not properly handled by :func:`len`.
+            Defaults to :py:obj:`False`.
 
     Returns:
         The generated ASCII table
@@ -416,5 +443,6 @@ def table2ascii(
             alignments=alignments,
             cell_padding=cell_padding,
             style=style,
+            use_wcwidth=use_wcwidth,
         ),
     ).to_ascii()
diff --git a/tests/test_convert.py b/tests/test_convert.py
@@ -246,3 +246,38 @@ def test_multiline_cells():
         "╚═══════════════════════════════════════════╝"
     )
     assert text == expected
+
+
+def test_east_asian_wide_characters_and_zero_width():
+    # using len() to count the number of characters
+    text = t2a(
+        header=["日期", "test"],
+        body=[["2022/12/11", "test"], ["2022/1/1", "測試"]],
+        cell_padding=5,
+    )
+    expected = (
+        "╔═══════════════════════════════════╗\n"
+        "║         日期               test     ║\n"
+        "╟───────────────────────────────────╢\n"
+        "║     2022/12/11           test     ║\n"
+        "║      2022/1/1             測試      ║\n"
+        "╚═══════════════════════════════════╝"
+    )
+    assert text == expected
+
+    # using wcwidth.wcswidth() to count the number of characters
+    text = t2a(
+        header=["日期", "test"],
+        body=[["2022/12/11", "test"], ["2022/1/1", "測試"]],
+        cell_padding=5,
+        use_wcwidth=True,
+    )
+    expected = (
+        "╔═══════════════════════════════════╗\n"
+        "║        日期              test     ║\n"
+        "╟───────────────────────────────────╢\n"
+        "║     2022/12/11           test     ║\n"
+        "║      2022/1/1            測試     ║\n"
+        "╚═══════════════════════════════════╝"
+    )
+    assert text == expected

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
	`1`	`+wcwidth<1`
`1`	`2`	`typing-extensions>=3.7.4; python_version<'3.8'`