Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d4ef31a
Add str.lower() and str.upper() primitives
Jahongir-Qurbonov Jul 4, 2025
124dceb
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 4, 2025
8065f9c
Refactor tolower_ucs4 and toupper_ucs4 functions by removing fallback…
Jahongir-Qurbonov Jul 6, 2025
1d40499
Optimize CPyStr_Lower and CPyStr_Upper for ASCII strings by removing …
Jahongir-Qurbonov Jul 6, 2025
2750549
Optimize CPyStr_Lower and CPyStr_Upper for ASCII strings by removing …
Jahongir-Qurbonov Jul 6, 2025
62520ef
Add test case for lower() method with special case for uppercase 'SS'
Jahongir-Qurbonov Jul 6, 2025
cc2ed14
Refactor CPyStr_Lower and CPyStr_Upper to use consistent variable nam…
Jahongir-Qurbonov Jul 6, 2025
5cdca5a
Add test case for lower() method to handle Greek capital sigma
Jahongir-Qurbonov Jul 6, 2025
7049d8b
Add commented-out test cases for lower() and upper() methods to handl…
Jahongir-Qurbonov Jul 6, 2025
bd4bde2
Merge branch 'python:master' into str-lower-upper
Jahongir-Qurbonov Jul 7, 2025
b8fe8f5
Merge branch 'master' into str-lower-upper
Jahongir-Qurbonov Aug 17, 2025
ea78a12
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 17, 2025
86abdde
Merge branch 'master' into str-lower-upper
Jahongir-Qurbonov Aug 21, 2025
2ae0353
Merge branch 'master' into str-lower-upper
Jahongir-Qurbonov Sep 2, 2025
bb9ba27
Merge branch 'master' into str-lower-upper
Jahongir-Qurbonov Sep 10, 2025
e1f1147
Merge branch 'master' into str-lower-upper
Jahongir-Qurbonov Sep 12, 2025
cffdc23
Merge branch 'master' into str-lower-upper
Jahongir-Qurbonov Dec 16, 2025
5589a5c
Merge branch 'python:master' into str-lower-upper
Jahongir-Qurbonov Dec 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,8 @@ PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors);
Py_ssize_t CPyStr_Count(PyObject *unicode, PyObject *substring, CPyTagged start);
Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged start, CPyTagged end);
CPyTagged CPyStr_Ord(PyObject *obj);
PyObject *CPyStr_Lower(PyObject *self);
PyObject *CPyStr_Upper(PyObject *self);
PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count);


Expand Down
75 changes: 75 additions & 0 deletions mypyc/lib-rt/str_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,81 @@ CPyTagged CPyStr_Ord(PyObject *obj) {
return CPY_INT_TAG;
}

PyObject *CPyStr_Lower(PyObject *self) {
if (PyUnicode_READY(self) == -1)
return NULL;

Py_ssize_t len = PyUnicode_GET_LENGTH(self);

// Fast path: ASCII only
if (PyUnicode_IS_ASCII(self)) {
PyObject *res = PyUnicode_New(len, 127);
if (res == NULL)
return NULL;
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
for (Py_ssize_t i = 0; i < len; i++) {
res_data[i] = Py_TOLOWER((unsigned char) data[i]);
}
return res;
}

// General Unicode path
int kind = PyUnicode_KIND(self);
void *data = PyUnicode_DATA(self);
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
PyObject *res = PyUnicode_New(len, maxchar);
if (res == NULL)
return NULL;
int res_kind = PyUnicode_KIND(res);
void *res_data = PyUnicode_DATA(res);

// Unified loop for all Unicode kinds
for (Py_ssize_t i = 0; i < len; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_UCS4 rch = Py_UNICODE_TOLOWER(ch);
PyUnicode_WRITE(res_kind, res_data, i, rch);
}
return res;
}

PyObject *CPyStr_Upper(PyObject *self) {
if (PyUnicode_READY(self) == -1)
return NULL;

Py_ssize_t len = PyUnicode_GET_LENGTH(self);

// Fast path: ASCII only
if (PyUnicode_IS_ASCII(self)) {
PyObject *res = PyUnicode_New(len, 127);
if (res == NULL)
return NULL;
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
Py_UCS1 *res_data = PyUnicode_1BYTE_DATA(res);
for (Py_ssize_t i = 0; i < len; i++) {
res_data[i] = Py_TOUPPER((unsigned char) data[i]);
}
return res;
}

// General Unicode path
int kind = PyUnicode_KIND(self);
void *data = PyUnicode_DATA(self);
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(self);
PyObject *res = PyUnicode_New(len, maxchar);
if (res == NULL)
return NULL;
int res_kind = PyUnicode_KIND(res);
void *res_data = PyUnicode_DATA(res);

// Unified loop for all Unicode kinds
for (Py_ssize_t i = 0; i < len; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_UCS4 rch = Py_UNICODE_TOUPPER(ch);
PyUnicode_WRITE(res_kind, res_data, i, rch);
}
return res;

PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count) {
Py_ssize_t temp_count = CPyTagged_AsSsize_t(count);
if (temp_count == -1 && PyErr_Occurred()) {
Expand Down
18 changes: 18 additions & 0 deletions mypyc/primitives/str_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,3 +507,21 @@
c_function_name="CPyStr_Ord",
error_kind=ERR_MAGIC,
)

# str.lower()
method_op(
name="lower",
arg_types=[str_rprimitive],
return_type=str_rprimitive,
c_function_name="CPyStr_Lower",
error_kind=ERR_MAGIC,
)

# str.upper()
method_op(
name="upper",
arg_types=[str_rprimitive],
return_type=str_rprimitive,
c_function_name="CPyStr_Upper",
error_kind=ERR_MAGIC,
)
3 changes: 2 additions & 1 deletion mypyc/test-data/fixtures/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def lstrip(self, item: Optional[str] = None) -> str: pass
def rstrip(self, item: Optional[str] = None) -> str: pass
def join(self, x: Iterable[str]) -> str: pass
def format(self, *args: Any, **kwargs: Any) -> str: ...
def upper(self) -> str: ...
def startswith(self, x: Union[str, Tuple[str, ...]], start: int=..., end: int=...) -> bool: ...
def endswith(self, x: Union[str, Tuple[str, ...]], start: int=..., end: int=...) -> bool: ...
def replace(self, old: str, new: str, maxcount: int=...) -> str: ...
Expand All @@ -128,6 +127,8 @@ def rpartition(self, sep: str, /) -> Tuple[str, str, str]: ...
def removeprefix(self, prefix: str, /) -> str: ...
def removesuffix(self, suffix: str, /) -> str: ...
def islower(self) -> bool: ...
def lower(self) -> str: ...
def upper(self) -> str: ...
def count(self, substr: str, start: Optional[int] = None, end: Optional[int] = None) -> int: pass

class float:
Expand Down
21 changes: 21 additions & 0 deletions mypyc/test-data/irbuild-str.test
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,27 @@ L3:
L4:
return r6


[case testLower]
def do_lower(s: str) -> str:
return s.lower()
[out]
def do_lower(s):
s, r0 :: str
L0:
r0 = CPyStr_Lower(s)
return r0

[case testUpper]
def do_upper(s: str) -> str:
return s.upper()
[out]
def do_upper(s):
s, r0 :: str
L0:
r0 = CPyStr_Upper(s)
return r0

[case testFStringFromConstants]
from typing import Final
string: Final = "abc"
Expand Down
27 changes: 27 additions & 0 deletions mypyc/test-data/run-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -1072,6 +1072,33 @@ def test_count_multi_start_end_emoji() -> None:
assert string.count("🚀🚀🚀", 0, 12) == 2, string.count("🚀🚀🚀", 0, 12)
assert string.count("ñññ", 0, 12) == 1, string.count("ñññ", 0, 12)

[case testLower]
def test_str_lower() -> None:
assert "".lower() == ""
assert "ABC".lower() == "abc"
assert "abc".lower() == "abc"
assert "AbC123".lower() == "abc123"
assert "áÉÍ".lower() == "áéí"
assert "😴🚀".lower() == "😴🚀"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also test special cases (verify that this agrees with normal Python semantics):

  • 'SS'.lower() == 'ss'
  • 'Σ'.lower()
  • 'İ'.lower() (changes length!)

# Special
assert "SS".lower() == "ss"
assert "Σ".lower() == "σ" # Greek capital sigma -> small sigma
#assert "İ".lower() == "i̇" # TODO: Latin capital letter I with dot above -> 'i' + combining dot
#assert len("İ".lower()) == 2 # TODO: Confirms length change

[case testUpper]
def test_str_upper() -> None:
assert "".upper() == ""
assert "abc".upper() == "ABC"
assert "ABC".upper() == "ABC"
assert "AbC123".upper() == "ABC123"
assert "áéí".upper() == "ÁÉÍ"
assert "😴🚀".upper() == "😴🚀"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also test special case (verify that this agrees with normal Python semantics):

  • 'ß'.upper() == 'SS'
  • 'ffi'.upper() (length increases!)

# Special
#assert "ß".upper() == "SS" # TODO: German sharp S -> double S
#assert "ffi".upper() == "FFI" # TODO: Ligature 'ffi' -> separate letters
#assert len("ffi".upper()) == 3 # TODO: Confirm length increases

[case testIsInstance]
from copysubclass import subc
from typing import Any
Expand Down
Loading