Skip to content

Commit 49bb90a

Browse files
[mypyc] Add a special case for len() of a str value (#10710)
Closes mypyc/mypyc#835 * Add a branch for str_rpimitive in builtin_len * Reduce redundant code * Faster list/tuple built from str
1 parent 1985928 commit 49bb90a

File tree

11 files changed

+86
-91
lines changed

11 files changed

+86
-91
lines changed

mypyc/irbuild/for_helpers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
)
1717
from mypyc.ir.rtypes import (
1818
RType, is_short_int_rprimitive, is_list_rprimitive, is_sequence_rprimitive,
19-
is_tuple_rprimitive, is_dict_rprimitive,
19+
is_tuple_rprimitive, is_dict_rprimitive, is_str_rprimitive,
2020
RTuple, short_int_rprimitive, int_rprimitive
2121
)
2222
from mypyc.primitives.registry import CFunctionDescription
@@ -164,7 +164,8 @@ def sequence_from_generator_preallocate_helper(
164164
"""
165165
if len(gen.sequences) == 1 and len(gen.indices) == 1 and len(gen.condlists[0]) == 0:
166166
rtype = builder.node_type(gen.sequences[0])
167-
if is_list_rprimitive(rtype) or is_tuple_rprimitive(rtype):
167+
if (is_list_rprimitive(rtype) or is_tuple_rprimitive(rtype)
168+
or is_str_rprimitive(rtype)):
168169
sequence = builder.accept(gen.sequences[0])
169170
length = builder.builder.builtin_len(sequence, gen.line, use_pyssize_t=True)
170171
target_op = empty_op_llbuilder(length, gen.line)

mypyc/irbuild/ll_builder.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
list_tuple_op, new_tuple_op, new_tuple_with_length_op
5353
)
5454
from mypyc.primitives.dict_ops import (
55-
dict_update_in_display_op, dict_new_op, dict_build_op, dict_size_op
55+
dict_update_in_display_op, dict_new_op, dict_build_op, dict_ssize_t_size_op
5656
)
5757
from mypyc.primitives.generic_ops import (
5858
py_getattr_op, py_call_op, py_call_with_kwargs_op, py_method_call_op,
@@ -64,7 +64,9 @@
6464
)
6565
from mypyc.primitives.int_ops import int_comparison_op_mapping
6666
from mypyc.primitives.exc_ops import err_occurred_op, keep_propagating_op
67-
from mypyc.primitives.str_ops import unicode_compare, str_check_if_true
67+
from mypyc.primitives.str_ops import (
68+
unicode_compare, str_check_if_true, str_ssize_t_size_op
69+
)
6870
from mypyc.primitives.set_ops import new_set_op
6971
from mypyc.rt_subtype import is_runtime_subtype
7072
from mypyc.subtype import is_subtype
@@ -1125,32 +1127,28 @@ def builtin_len(self, val: Value, line: int, use_pyssize_t: bool = False) -> Val
11251127
Return c_pyssize_t if use_pyssize_t is true (unshifted).
11261128
"""
11271129
typ = val.type
1130+
size_value = None
11281131
if is_list_rprimitive(typ) or is_tuple_rprimitive(typ):
11291132
elem_address = self.add(GetElementPtr(val, PyVarObject, 'ob_size'))
11301133
size_value = self.add(LoadMem(c_pyssize_t_rprimitive, elem_address))
11311134
self.add(KeepAlive([val]))
1132-
if use_pyssize_t:
1133-
return size_value
1134-
offset = Integer(1, c_pyssize_t_rprimitive, line)
1135-
return self.int_op(short_int_rprimitive, size_value, offset,
1136-
IntOp.LEFT_SHIFT, line)
1137-
elif is_dict_rprimitive(typ):
1138-
size_value = self.call_c(dict_size_op, [val], line)
1139-
if use_pyssize_t:
1140-
return size_value
1141-
offset = Integer(1, c_pyssize_t_rprimitive, line)
1142-
return self.int_op(short_int_rprimitive, size_value, offset,
1143-
IntOp.LEFT_SHIFT, line)
11441135
elif is_set_rprimitive(typ):
11451136
elem_address = self.add(GetElementPtr(val, PySetObject, 'used'))
11461137
size_value = self.add(LoadMem(c_pyssize_t_rprimitive, elem_address))
11471138
self.add(KeepAlive([val]))
1139+
elif is_dict_rprimitive(typ):
1140+
size_value = self.call_c(dict_ssize_t_size_op, [val], line)
1141+
elif is_str_rprimitive(typ):
1142+
size_value = self.call_c(str_ssize_t_size_op, [val], line)
1143+
1144+
if size_value is not None:
11481145
if use_pyssize_t:
11491146
return size_value
11501147
offset = Integer(1, c_pyssize_t_rprimitive, line)
11511148
return self.int_op(short_int_rprimitive, size_value, offset,
11521149
IntOp.LEFT_SHIFT, line)
1153-
elif isinstance(typ, RInstance):
1150+
1151+
if isinstance(typ, RInstance):
11541152
# TODO: Support use_pyssize_t
11551153
assert not use_pyssize_t
11561154
length = self.gen_method_call(val, '__len__', [], int_rprimitive, line)
@@ -1164,12 +1162,12 @@ def builtin_len(self, val: Value, line: int, use_pyssize_t: bool = False) -> Val
11641162
self.add(Unreachable())
11651163
self.activate_block(ok)
11661164
return length
1165+
1166+
# generic case
1167+
if use_pyssize_t:
1168+
return self.call_c(generic_ssize_t_len_op, [val], line)
11671169
else:
1168-
# generic case
1169-
if use_pyssize_t:
1170-
return self.call_c(generic_ssize_t_len_op, [val], line)
1171-
else:
1172-
return self.call_c(generic_len_op, [val], line)
1170+
return self.call_c(generic_len_op, [val], line)
11731171

11741172
def new_tuple(self, items: List[Value], line: int) -> Value:
11751173
size: Value = Integer(len(items), c_pyssize_t_rprimitive)

mypyc/irbuild/specialize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def dict_methods_fast_path(
126126
def translate_list_from_generator_call(
127127
builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Optional[Value]:
128128
# Special case for simplest list comprehension, for example
129-
# list(f(x) for x in other_list/other_tuple)
129+
# list(f(x) for x in some_list/some_tuple/some_str)
130130
# translate_list_comprehension() would take care of other cases if this fails.
131131
if (len(expr.args) == 1
132132
and expr.arg_kinds[0] == ARG_POS
@@ -142,7 +142,7 @@ def translate_list_from_generator_call(
142142
def translate_tuple_from_generator_call(
143143
builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Optional[Value]:
144144
# Special case for simplest tuple creation from a generator, for example
145-
# tuple(f(x) for x in other_list/other_tuple)
145+
# tuple(f(x) for x in some_list/some_tuple/some_str)
146146
# translate_safe_generator_call() would take care of other cases if this fails.
147147
if (len(expr.args) == 1
148148
and expr.arg_kinds[0] == ARG_POS

mypyc/lib-rt/CPy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ PyObject *CPyStr_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end);
392392
bool CPyStr_Startswith(PyObject *self, PyObject *subobj);
393393
bool CPyStr_Endswith(PyObject *self, PyObject *subobj);
394394
bool CPyStr_IsTrue(PyObject *obj);
395+
Py_ssize_t CPyStr_Size_size_t(PyObject *str);
395396

396397

397398
// Set operations

mypyc/lib-rt/str_ops.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,16 @@ PyObject *CPyStr_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
102102
}
103103
return CPyObject_GetSlice(obj, start, end);
104104
}
105+
105106
/* Check if the given string is true (i.e. it's length isn't zero) */
106107
bool CPyStr_IsTrue(PyObject *obj) {
107108
Py_ssize_t length = PyUnicode_GET_LENGTH(obj);
108109
return length != 0;
109110
}
111+
112+
Py_ssize_t CPyStr_Size_size_t(PyObject *str) {
113+
if (PyUnicode_READY(str) != -1) {
114+
return PyUnicode_GET_LENGTH(str);
115+
}
116+
return -1;
117+
}

mypyc/primitives/dict_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@
250250
c_function_name='CPyDict_CheckSize',
251251
error_kind=ERR_FALSE)
252252

253-
dict_size_op = custom_op(
253+
dict_ssize_t_size_op = custom_op(
254254
arg_types=[dict_rprimitive],
255255
return_type=c_pyssize_t_rprimitive,
256256
c_function_name='PyDict_Size',

mypyc/primitives/str_ops.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
from mypyc.ir.ops import ERR_MAGIC, ERR_NEVER
66
from mypyc.ir.rtypes import (
77
RType, object_rprimitive, str_rprimitive, int_rprimitive, list_rprimitive,
8-
c_int_rprimitive, pointer_rprimitive, bool_rprimitive, bit_rprimitive
8+
c_int_rprimitive, pointer_rprimitive, bool_rprimitive, bit_rprimitive,
9+
c_pyssize_t_rprimitive
910
)
1011
from mypyc.primitives.registry import (
1112
method_op, binary_op, function_op,
12-
load_address_op, custom_op
13+
load_address_op, custom_op, ERR_NEG_INT
1314
)
1415

1516

@@ -89,7 +90,7 @@
8990

9091
# str1 += str2
9192
#
92-
# PyUnicodeAppend makes an effort to reuse the LHS when the refcount
93+
# PyUnicode_Append makes an effort to reuse the LHS when the refcount
9394
# is 1. This is super dodgy but oh well, the interpreter does it.
9495
binary_op(name='+=',
9596
arg_types=[str_rprimitive, str_rprimitive],
@@ -116,7 +117,7 @@
116117
name='replace',
117118
arg_types=[str_rprimitive, str_rprimitive, str_rprimitive],
118119
return_type=str_rprimitive,
119-
c_function_name="PyUnicode_Replace",
120+
c_function_name='PyUnicode_Replace',
120121
error_kind=ERR_MAGIC,
121122
extra_int_constants=[(-1, c_int_rprimitive)])
122123

@@ -125,13 +126,19 @@
125126
name='replace',
126127
arg_types=[str_rprimitive, str_rprimitive, str_rprimitive, int_rprimitive],
127128
return_type=str_rprimitive,
128-
c_function_name="CPyStr_Replace",
129+
c_function_name='CPyStr_Replace',
129130
error_kind=ERR_MAGIC)
130131

131132
# check if a string is true (isn't an empty string)
132133
str_check_if_true = custom_op(
133134
arg_types=[str_rprimitive],
134135
return_type=bit_rprimitive,
135-
c_function_name="CPyStr_IsTrue",
136+
c_function_name='CPyStr_IsTrue',
136137
error_kind=ERR_NEVER,
137138
)
139+
140+
str_ssize_t_size_op = custom_op(
141+
arg_types=[str_rprimitive],
142+
return_type=c_pyssize_t_rprimitive,
143+
c_function_name='CPyStr_Size_size_t',
144+
error_kind=ERR_NEG_INT)

mypyc/test-data/irbuild-str.test

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,3 @@ L2:
158158
return 0
159159
L3:
160160
unreachable
161-

mypyc/test-data/irbuild-tuple.test

Lines changed: 32 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -350,14 +350,13 @@ L4:
350350
return 1
351351

352352

353-
[case testTupleBuiltFromList2]
353+
[case testTupleBuiltFromStr]
354354
def f2(val: str) -> str:
355355
return val + "f2"
356356

357357
def test() -> None:
358-
source = ["a", "b", "c"]
358+
source = "abc"
359359
a = tuple(f2(x) for x in source)
360-
print(a)
361360
[out]
362361
def f2(val):
363362
val, r0, r1 :: str
@@ -366,71 +365,45 @@ L0:
366365
r1 = PyUnicode_Concat(val, r0)
367366
return r1
368367
def test():
369-
r0, r1, r2 :: str
370-
r3 :: list
371-
r4, r5, r6, r7 :: ptr
372-
source :: list
373-
r8 :: ptr
374-
r9 :: native_int
375-
r10 :: tuple
376-
r11 :: short_int
377-
r12 :: ptr
378-
r13 :: native_int
379-
r14 :: short_int
380-
r15 :: bit
381-
r16 :: object
382-
r17, x, r18 :: str
383-
r19 :: bit
384-
r20 :: short_int
368+
r0, source :: str
369+
r1 :: native_int
370+
r2 :: bit
371+
r3 :: tuple
372+
r4 :: short_int
373+
r5 :: native_int
374+
r6 :: bit
375+
r7 :: short_int
376+
r8 :: bit
377+
r9, x, r10 :: str
378+
r11 :: bit
379+
r12 :: short_int
385380
a :: tuple
386-
r21 :: object
387-
r22 :: str
388-
r23, r24 :: object
389381
L0:
390-
r0 = 'a'
391-
r1 = 'b'
392-
r2 = 'c'
393-
r3 = PyList_New(3)
394-
r4 = get_element_ptr r3 ob_item :: PyListObject
395-
r5 = load_mem r4 :: ptr*
396-
set_mem r5, r0 :: builtins.object*
397-
r6 = r5 + WORD_SIZE*1
398-
set_mem r6, r1 :: builtins.object*
399-
r7 = r5 + WORD_SIZE*2
400-
set_mem r7, r2 :: builtins.object*
401-
keep_alive r3
402-
source = r3
403-
r8 = get_element_ptr source ob_size :: PyVarObject
404-
r9 = load_mem r8 :: native_int*
405-
keep_alive source
406-
r10 = PyTuple_New(r9)
407-
r11 = 0
382+
r0 = 'abc'
383+
source = r0
384+
r1 = CPyStr_Size_size_t(source)
385+
r2 = r1 >= 0 :: signed
386+
r3 = PyTuple_New(r1)
387+
r4 = 0
408388
L1:
409-
r12 = get_element_ptr source ob_size :: PyVarObject
410-
r13 = load_mem r12 :: native_int*
411-
keep_alive source
412-
r14 = r13 << 1
413-
r15 = r11 < r14 :: signed
414-
if r15 goto L2 else goto L4 :: bool
389+
r5 = CPyStr_Size_size_t(source)
390+
r6 = r5 >= 0 :: signed
391+
r7 = r5 << 1
392+
r8 = r4 < r7 :: signed
393+
if r8 goto L2 else goto L4 :: bool
415394
L2:
416-
r16 = CPyList_GetItemUnsafe(source, r11)
417-
r17 = cast(str, r16)
418-
x = r17
419-
r18 = f2(x)
420-
r19 = CPySequenceTuple_SetItemUnsafe(r10, r11, r18)
395+
r9 = CPyStr_GetItem(source, r4)
396+
x = r9
397+
r10 = f2(x)
398+
r11 = CPySequenceTuple_SetItemUnsafe(r3, r4, r10)
421399
L3:
422-
r20 = r11 + 2
423-
r11 = r20
400+
r12 = r4 + 2
401+
r4 = r12
424402
goto L1
425403
L4:
426-
a = r10
427-
r21 = builtins :: module
428-
r22 = 'print'
429-
r23 = CPyObject_GetAttr(r21, r22)
430-
r24 = PyObject_CallFunctionObjArgs(r23, a, 0)
404+
a = r3
431405
return 1
432406

433-
434407
[case testTupleBuiltFromVariableLengthTuple]
435408
from typing import Tuple
436409

mypyc/test-data/run-lists.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,3 +357,7 @@ def test() -> None:
357357
source_e = [0, 1, 2]
358358
e = list((x ** 2) for x in (y + 2 for y in source_e))
359359
assert e == [4, 9, 16]
360+
source_str = "abcd"
361+
f = list("str:" + x for x in source_str)
362+
assert f == ["str:a", "str:b", "str:c", "str:d"]
363+

mypyc/test-data/run-tuples.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,10 @@ def test_sequence_generator() -> None:
196196
a = tuple(f8(x) for x in source_fixed_length_tuple)
197197
assert a == (False, True, False, True)
198198

199+
source_str = 'abbc'
200+
b = tuple('s:' + x for x in source_str)
201+
assert b == ('s:a', 's:b', 's:b', 's:c')
202+
199203
TUPLE: Final[Tuple[str, ...]] = ('x', 'y')
200204

201205
def test_final_boxed_tuple() -> None:

0 commit comments

Comments
 (0)