Skip to content

Commit d1d903d

Browse files
authored
Implement Unicode Support
closes #93 * fix full-width characters issue (magmax/python-inquirer#432) * Fix the issue that raises a bunch of OSError exceptions in the test script (#93) * Fix UnicodeEncodeError when inputting emojis * add tests for new unicode support
1 parent a3e9b0b commit d1d903d

File tree

4 files changed

+60
-16
lines changed

4 files changed

+60
-16
lines changed

readchar/_win_read.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,38 @@
44

55

66
def readchar() -> str:
7-
"""Reads a single character from the input stream.
7+
"""Reads a single utf8-character from the input stream.
88
Blocks until a character is available."""
99

10-
# manual byte decoding because some bytes in windows are not utf-8 encodable.
11-
return chr(int.from_bytes(msvcrt.getch(), "big"))
10+
# read a single wide character from the input
11+
return msvcrt.getwch()
1212

1313

1414
def readkey() -> str:
1515
"""Reads the next keypress. If an escaped key is pressed, the full
1616
sequence is read and returned as noted in `_win_key.py`."""
1717

18+
# read first character
1819
ch = readchar()
1920

21+
# keys like CTRL+C should cause a interrupt
2022
if ch in config.INTERRUPT_KEYS:
2123
raise KeyboardInterrupt
2224

23-
# if it is a normal character:
24-
if ch not in "\x00\xe0":
25-
return ch
25+
# parse special multi character keys (see key module)
26+
# https://learn.microsoft.com/cpp/c-runtime-library/reference/getch-getwch#remarks
27+
if ch in "\x00\xe0":
28+
# read the second half
29+
# we always return the 0x00 prefix, this avoids duplications in the key module
30+
ch = "\x00" + readchar()
2631

27-
# if it is a scpeal key, read second half:
28-
ch2 = readchar()
32+
# parse unicode surrogates
33+
# https://docs.python.org/3/c-api/unicode.html#c.Py_UNICODE_IS_SURROGATE
34+
if "\uD800" <= ch <= "\uDFFF":
35+
ch += readchar()
2936

30-
return "\x00" + ch2
37+
# combine the characters into a single utf-16 encoded string.
38+
# this prevents the character from being treated as a surrogate pair again.
39+
ch = ch.encode("utf-16", errors="surrogatepass").decode("utf-16")
40+
41+
return ch

tests/windows/conftest.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,20 @@
33
import pytest
44

55

6-
if sys.platform in ("win32", "cygwin"):
7-
import msvcrt
8-
9-
106
# ignore all tests in this folder if not on windows
117
def pytest_ignore_collect(path, config):
128
if sys.platform not in ("win32", "cygwin"):
139
return True
1410

1511

1612
@pytest.fixture
17-
def patched_stdin():
13+
def patched_stdin(monkeypatch):
1814
class mocked_stdin:
1915
def push(self, string):
20-
for c in string:
21-
msvcrt.ungetch(ord(c).to_bytes(1, "big"))
16+
# Create an iterator from the string
17+
characters = iter(string)
18+
19+
# Patch msvcrt.getwch to return the next character from the iterator.
20+
monkeypatch.setattr("msvcrt.getwch", lambda: next(characters))
2221

2322
return mocked_stdin()

tests/windows/test_readchar.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,18 @@ def test_controlCharacters(seq, key, patched_stdin):
6262
def test_CTRL_Characters(seq, key, patched_stdin):
6363
patched_stdin.push(seq)
6464
assert key == readchar()
65+
66+
67+
@pytest.mark.parametrize(
68+
["seq", "key"],
69+
[
70+
("\xe4", "ä"),
71+
("\xe1", "á"),
72+
("\xe5", "å"),
73+
("\xdf", "ß"),
74+
("\u304c", "が"),
75+
],
76+
)
77+
def test_Unicode_Characters(seq, key, patched_stdin):
78+
patched_stdin.push(seq)
79+
assert key == readchar()

tests/windows/test_readkey.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,22 @@ def test_navigationKeys(seq, key, patched_stdin):
6565
def test_functionKeys(seq, key, patched_stdin):
6666
patched_stdin.push(seq)
6767
assert key == readkey()
68+
69+
70+
@pytest.mark.parametrize(
71+
["seq", "key"],
72+
[
73+
("\ud83d\ude00", "😀"),
74+
("\ud83d\ude18", "😘"),
75+
("\ud83d\ude09", "😉"),
76+
("\ud83d\udc4d", "👍"),
77+
("\ud83d\udc35", "🐵"),
78+
("\ud83c\udf47", "🍇"),
79+
("\ud83c\udf83", "🎃"),
80+
("\ud83d\udc53", "👓"),
81+
("\ud83c\udfc1", "🏁"),
82+
],
83+
)
84+
def test_UnicodeSurrogates(seq, key, patched_stdin):
85+
patched_stdin.push(seq)
86+
assert key == readkey()

0 commit comments

Comments
 (0)