Skip to content

Commit

Permalink
[mypyc] Support the u8 native integer type (#15564)
Browse files Browse the repository at this point in the history
This is mostly similar to `i16` that I added recently in #15464, but
there are some
differences:
* Some adjustments were needed to support unsigned integers
* Add overflow checking of literals, since it's easy to over/underflow
when using `u8` due to limited range
* Rename primitive integer types from `int16` to `i16` (etc.) to match
the user-visible types (needed to get some error messages consistent,
and it's generally nicer)
* Overall make things a bit more consistent
* Actually update `mypy_extensions` stubs

This is an unsigned type to make it easier to work with binary/bytes
data. The item values for `bytes` are unsigned 8-bit values, in
particular. This type will become much more useful once we support
packed arrays.

---------

Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
  • Loading branch information
JukkaL and hauntsaninja authored Jul 6, 2023
1 parent e0b159e commit 8c70e80
Show file tree
Hide file tree
Showing 50 changed files with 1,843 additions and 634 deletions.
1 change: 1 addition & 0 deletions mypy/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@
"mypy_extensions.i64",
"mypy_extensions.i32",
"mypy_extensions.i16",
"mypy_extensions.u8",
)

DATACLASS_TRANSFORM_NAMES: Final = (
Expand Down
35 changes: 35 additions & 0 deletions mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,38 @@ class i16:
def __ge__(self, x: i16) -> bool: ...
def __gt__(self, x: i16) -> bool: ...
def __index__(self) -> int: ...

class u8:
@overload
def __new__(cls, __x: str | ReadableBuffer | SupportsInt | SupportsIndex | SupportsTrunc = ...) -> u8: ...
@overload
def __new__(cls, __x: str | bytes | bytearray, base: SupportsIndex) -> u8: ...

def __add__(self, x: u8) -> u8: ...
def __radd__(self, x: u8) -> u8: ...
def __sub__(self, x: u8) -> u8: ...
def __rsub__(self, x: u8) -> u8: ...
def __mul__(self, x: u8) -> u8: ...
def __rmul__(self, x: u8) -> u8: ...
def __floordiv__(self, x: u8) -> u8: ...
def __rfloordiv__(self, x: u8) -> u8: ...
def __mod__(self, x: u8) -> u8: ...
def __rmod__(self, x: u8) -> u8: ...
def __and__(self, x: u8) -> u8: ...
def __rand__(self, x: u8) -> u8: ...
def __or__(self, x: u8) -> u8: ...
def __ror__(self, x: u8) -> u8: ...
def __xor__(self, x: u8) -> u8: ...
def __rxor__(self, x: u8) -> u8: ...
def __lshift__(self, x: u8) -> u8: ...
def __rlshift__(self, x: u8) -> u8: ...
def __rshift__(self, x: u8) -> u8: ...
def __rrshift__(self, x: u8) -> u8: ...
def __neg__(self) -> u8: ...
def __invert__(self) -> u8: ...
def __pos__(self) -> u8: ...
def __lt__(self, x: u8) -> bool: ...
def __le__(self, x: u8) -> bool: ...
def __ge__(self, x: u8) -> bool: ...
def __gt__(self, x: u8) -> bool: ...
def __index__(self) -> int: ...
11 changes: 10 additions & 1 deletion mypyc/codegen/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
is_short_int_rprimitive,
is_str_rprimitive,
is_tuple_rprimitive,
is_uint8_rprimitive,
object_rprimitive,
optional_value_type,
)
Expand Down Expand Up @@ -922,6 +923,14 @@ def emit_unbox(
self.emit_line(f"{dest} = CPyLong_AsInt16({src});")
if not isinstance(error, AssignHandler):
self.emit_unbox_failure_with_overlapping_error_value(dest, typ, failure)
elif is_uint8_rprimitive(typ):
# Whether we are borrowing or not makes no difference.
assert not optional # Not supported for overlapping error values
if declare_dest:
self.emit_line(f"uint8_t {dest};")
self.emit_line(f"{dest} = CPyLong_AsUInt8({src});")
if not isinstance(error, AssignHandler):
self.emit_unbox_failure_with_overlapping_error_value(dest, typ, failure)
elif is_float_rprimitive(typ):
assert not optional # Not supported for overlapping error values
if declare_dest:
Expand Down Expand Up @@ -1013,7 +1022,7 @@ def emit_box(
self.emit_lines(f"{declaration}{dest} = Py_None;")
if not can_borrow:
self.emit_inc_ref(dest, object_rprimitive)
elif is_int32_rprimitive(typ) or is_int16_rprimitive(typ):
elif is_int32_rprimitive(typ) or is_int16_rprimitive(typ) or is_uint8_rprimitive(typ):
self.emit_line(f"{declaration}{dest} = PyLong_FromLong({src});")
elif is_int64_rprimitive(typ):
self.emit_line(f"{declaration}{dest} = PyLong_FromLongLong({src});")
Expand Down
2 changes: 2 additions & 0 deletions mypyc/doc/float_operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Construction
* ``float(x: i64)``
* ``float(x: i32)``
* ``float(x: i16)``
* ``float(x: u8)``
* ``float(x: str)``
* ``float(x: float)`` (no-op)

Expand All @@ -32,6 +33,7 @@ Functions
* ``i64(f)`` (convert to 64-bit signed integer)
* ``i32(f)`` (convert to 32-bit signed integer)
* ``i16(f)`` (convert to 16-bit signed integer)
* ``u8(f)`` (convert to 8-bit unsigned integer)
* ``abs(f)``
* ``math.sin(f)``
* ``math.cos(f)``
Expand Down
42 changes: 27 additions & 15 deletions mypyc/doc/int_operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ Mypyc supports these integer types:
* ``i64`` (64-bit signed integer)
* ``i32`` (32-bit signed integer)
* ``i16`` (16-bit signed integer)
* ``u8`` (8-bit unsigned integer)

``i64``, ``i32`` and ``i16`` are *native integer types* and must be imported
from the ``mypy_extensions`` module. ``int`` corresponds to the Python
``int`` type, but uses a more efficient runtime representation (tagged
pointer). Native integer types are value types.
``i64``, ``i32``, ``i16`` and ``u8`` are *native integer types* and
are available in the ``mypy_extensions`` module. ``int`` corresponds
to the Python ``int`` type, but uses a more efficient runtime
representation (tagged pointer). Native integer types are value types.

All integer types have optimized primitive operations, but the native
integer types are more efficient than ``int``, since they don't
Expand All @@ -34,6 +35,7 @@ Construction
* ``int(x: i64)``
* ``int(x: i32)``
* ``int(x: i16)``
* ``int(x: u8)``
* ``int(x: str)``
* ``int(x: str, base: int)``
* ``int(x: int)`` (no-op)
Expand All @@ -42,31 +44,34 @@ Construction

* ``i64(x: int)``
* ``i64(x: float)``
* ``i64(x: i64)`` (no-op)
* ``i64(x: i32)``
* ``i64(x: i16)``
* ``i64(x: u8)``
* ``i64(x: str)``
* ``i64(x: str, base: int)``
* ``i64(x: i64)`` (no-op)

``i32`` type:

* ``i32(x: int)``
* ``i32(x: float)``
* ``i32(x: i64)`` (truncate)
* ``i32(x: i32)`` (no-op)
* ``i32(x: i16)``
* ``i32(x: u8)``
* ``i32(x: str)``
* ``i32(x: str, base: int)``
* ``i32(x: i32)`` (no-op)

``i16`` type:

* ``i16(x: int)``
* ``i16(x: float)``
* ``i16(x: i64)`` (truncate)
* ``i16(x: i32)`` (truncate)
* ``i16(x: i16)`` (no-op)
* ``i16(x: u8)``
* ``i16(x: str)``
* ``i16(x: str, base: int)``
* ``i16(x: i16)`` (no-op)

Conversions from ``int`` to a native integer type raise
``OverflowError`` if the value is too large or small. Conversions from
Expand All @@ -80,6 +85,8 @@ Implicit conversions
``int`` values can be implicitly converted to a native integer type,
for convenience. This means that these are equivalent::

from mypy_extensions import i64

def implicit() -> None:
# Implicit conversion of 0 (int) to i64
x: i64 = 0
Expand Down Expand Up @@ -107,18 +114,23 @@ Operators
* Comparisons (``==``, ``!=``, ``<``, etc.)
* Augmented assignment (``x += y``, etc.)

If one of the above native integer operations overflows or underflows,
the behavior is undefined. Native integer types should only be used if
all possible values are small enough for the type. For this reason,
the arbitrary-precision ``int`` type is recommended unless the
performance of integer operations is critical.
If one of the above native integer operations overflows or underflows
with signed operands, the behavior is undefined. Signed native integer
types should only be used if all possible values are small enough for
the type. For this reason, the arbitrary-precision ``int`` type is
recommended for signed values unless the performance of integer
operations is critical.

Operations on unsigned integers (``u8``) wrap around on overflow.

It's a compile-time error to mix different native integer types in a
binary operation such as addition. An explicit conversion is required::

def add(x: i64, y: i32) -> None:
a = x + y # Error (i64 + i32)
b = x + i64(y) # OK
from mypy_extensions import i64, i32

def add(x: i64, y: i32) -> None:
a = x + y # Error (i64 + i32)
b = x + i64(y) # OK

You can freely mix a native integer value and an arbitrary-precision
``int`` value in an operation. The native integer type is "sticky"
Expand Down
15 changes: 8 additions & 7 deletions mypyc/doc/using_type_annotations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ implementations:
* ``i64`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
* ``i32`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
* ``i16`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
* ``u8`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
* ``float`` (:ref:`native operations <float-ops>`)
* ``bool`` (:ref:`native operations <bool-ops>`)
* ``str`` (:ref:`native operations <str-ops>`)
Expand Down Expand Up @@ -344,13 +345,13 @@ Native integer types
--------------------

You can use the native integer types ``i64`` (64-bit signed integer),
``i32`` (32-bit signed integer), and ``i16`` (16-bit signed integer)
if you know that integer values will always fit within fixed
bounds. These types are faster than the arbitrary-precision ``int``
type, since they don't require overflow checks on operations. ``i32``
and ``i16`` may also use less memory than ``int`` values. The types
are imported from the ``mypy_extensions`` module (installed via ``pip
install mypy_extensions``).
``i32`` (32-bit signed integer), ``i16`` (16-bit signed integer), and
``u8`` (8-bit unsigned integer) if you know that integer values will
always fit within fixed bounds. These types are faster than the
arbitrary-precision ``int`` type, since they don't require overflow
checks on operations. They may also use less memory than ``int``
values. The types are imported from the ``mypy_extensions`` module
(installed via ``pip install mypy_extensions``).

Example::

Expand Down
1 change: 1 addition & 0 deletions mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,7 @@ class ComparisonOp(RegisterOp):
}

signed_ops: Final = {"==": EQ, "!=": NEQ, "<": SLT, ">": SGT, "<=": SLE, ">=": SGE}
unsigned_ops: Final = {"==": EQ, "!=": NEQ, "<": ULT, ">": UGT, "<=": ULE, ">=": UGE}

def __init__(self, lhs: Value, rhs: Value, op: int, line: int = -1) -> None:
super().__init__(line)
Expand Down
Loading

0 comments on commit 8c70e80

Please sign in to comment.