[mypyc] Support the u8 native integer type (#15564)

This is mostly similar to `i16` that I added recently in #15464, but there are some differences: * Some adjustments were needed to support unsigned integers * Add overflow checking of literals, since it's easy to over/underflow when using `u8` due to limited range * Rename primitive integer types from `int16` to `i16` (etc.) to match the user-visible types (needed to get some error messages consistent, and it's generally nicer) * Overall make things a bit more consistent * Actually update `mypy_extensions` stubs This is an unsigned type to make it easier to work with binary/bytes data. The item values for `bytes` are unsigned 8-bit values, in particular. This type will become much more useful once we support packed arrays. --------- Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
python · Jul 6, 2023 · 8c70e80 · 8c70e80
1 parent e0b159e
commit 8c70e80
Show file tree

Hide file tree

Showing 50 changed files with 1,843 additions and 634 deletions.
diff --git a/mypy/types.py b/mypy/types.py
@@ -155,6 +155,7 @@
     "mypy_extensions.i64",
     "mypy_extensions.i32",
     "mypy_extensions.i16",
+    "mypy_extensions.u8",
 )
 
 DATACLASS_TRANSFORM_NAMES: Final = (

diff --git a/mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi b/mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi
@@ -181,3 +181,38 @@ class i16:
     def __ge__(self, x: i16) -> bool: ...
     def __gt__(self, x: i16) -> bool: ...
     def __index__(self) -> int: ...
+
+class u8:
+    @overload
+    def __new__(cls, __x: str | ReadableBuffer | SupportsInt | SupportsIndex | SupportsTrunc = ...) -> u8: ...
+    @overload
+    def __new__(cls, __x: str | bytes | bytearray, base: SupportsIndex) -> u8: ...
+
+    def __add__(self, x: u8) -> u8: ...
+    def __radd__(self, x: u8) -> u8: ...
+    def __sub__(self, x: u8) -> u8: ...
+    def __rsub__(self, x: u8) -> u8: ...
+    def __mul__(self, x: u8) -> u8: ...
+    def __rmul__(self, x: u8) -> u8: ...
+    def __floordiv__(self, x: u8) -> u8: ...
+    def __rfloordiv__(self, x: u8) -> u8: ...
+    def __mod__(self, x: u8) -> u8: ...
+    def __rmod__(self, x: u8) -> u8: ...
+    def __and__(self, x: u8) -> u8: ...
+    def __rand__(self, x: u8) -> u8: ...
+    def __or__(self, x: u8) -> u8: ...
+    def __ror__(self, x: u8) -> u8: ...
+    def __xor__(self, x: u8) -> u8: ...
+    def __rxor__(self, x: u8) -> u8: ...
+    def __lshift__(self, x: u8) -> u8: ...
+    def __rlshift__(self, x: u8) -> u8: ...
+    def __rshift__(self, x: u8) -> u8: ...
+    def __rrshift__(self, x: u8) -> u8: ...
+    def __neg__(self) -> u8: ...
+    def __invert__(self) -> u8: ...
+    def __pos__(self) -> u8: ...
+    def __lt__(self, x: u8) -> bool: ...
+    def __le__(self, x: u8) -> bool: ...
+    def __ge__(self, x: u8) -> bool: ...
+    def __gt__(self, x: u8) -> bool: ...
+    def __index__(self) -> int: ...
diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py
@@ -47,6 +47,7 @@
     is_short_int_rprimitive,
     is_str_rprimitive,
     is_tuple_rprimitive,
+    is_uint8_rprimitive,
     object_rprimitive,
     optional_value_type,
 )
@@ -922,6 +923,14 @@ def emit_unbox(
             self.emit_line(f"{dest} = CPyLong_AsInt16({src});")
             if not isinstance(error, AssignHandler):
                 self.emit_unbox_failure_with_overlapping_error_value(dest, typ, failure)
+        elif is_uint8_rprimitive(typ):
+            # Whether we are borrowing or not makes no difference.
+            assert not optional  # Not supported for overlapping error values
+            if declare_dest:
+                self.emit_line(f"uint8_t {dest};")
+            self.emit_line(f"{dest} = CPyLong_AsUInt8({src});")
+            if not isinstance(error, AssignHandler):
+                self.emit_unbox_failure_with_overlapping_error_value(dest, typ, failure)
         elif is_float_rprimitive(typ):
             assert not optional  # Not supported for overlapping error values
             if declare_dest:
@@ -1013,7 +1022,7 @@ def emit_box(
             self.emit_lines(f"{declaration}{dest} = Py_None;")
             if not can_borrow:
                 self.emit_inc_ref(dest, object_rprimitive)
-        elif is_int32_rprimitive(typ) or is_int16_rprimitive(typ):
+        elif is_int32_rprimitive(typ) or is_int16_rprimitive(typ) or is_uint8_rprimitive(typ):
             self.emit_line(f"{declaration}{dest} = PyLong_FromLong({src});")
         elif is_int64_rprimitive(typ):
             self.emit_line(f"{declaration}{dest} = PyLong_FromLongLong({src});")

diff --git a/mypyc/doc/float_operations.rst b/mypyc/doc/float_operations.rst
@@ -15,6 +15,7 @@ Construction
 * ``float(x: i64)``
 * ``float(x: i32)``
 * ``float(x: i16)``
+* ``float(x: u8)``
 * ``float(x: str)``
 * ``float(x: float)`` (no-op)
 
@@ -32,6 +33,7 @@ Functions
 * ``i64(f)`` (convert to 64-bit signed integer)
 * ``i32(f)`` (convert to 32-bit signed integer)
 * ``i16(f)`` (convert to 16-bit signed integer)
+* ``u8(f)`` (convert to 8-bit unsigned integer)
 * ``abs(f)``
 * ``math.sin(f)``
 * ``math.cos(f)``

diff --git a/mypyc/doc/int_operations.rst b/mypyc/doc/int_operations.rst
@@ -9,11 +9,12 @@ Mypyc supports these integer types:
 * ``i64`` (64-bit signed integer)
 * ``i32`` (32-bit signed integer)
 * ``i16`` (16-bit signed integer)
+* ``u8`` (8-bit unsigned integer)
 
-``i64``, ``i32`` and ``i16`` are *native integer types* and must be imported
-from the ``mypy_extensions`` module. ``int`` corresponds to the Python
-``int`` type, but uses a more efficient runtime representation (tagged
-pointer). Native integer types are value types.
+``i64``, ``i32``, ``i16`` and ``u8`` are *native integer types* and
+are available in the ``mypy_extensions`` module. ``int`` corresponds
+to the Python ``int`` type, but uses a more efficient runtime
+representation (tagged pointer). Native integer types are value types.
 
 All integer types have optimized primitive operations, but the native
 integer types are more efficient than ``int``, since they don't
@@ -34,6 +35,7 @@ Construction
 * ``int(x: i64)``
 * ``int(x: i32)``
 * ``int(x: i16)``
+* ``int(x: u8)``
 * ``int(x: str)``
 * ``int(x: str, base: int)``
 * ``int(x: int)`` (no-op)
@@ -42,31 +44,34 @@ Construction
 
 * ``i64(x: int)``
 * ``i64(x: float)``
+* ``i64(x: i64)`` (no-op)
 * ``i64(x: i32)``
 * ``i64(x: i16)``
+* ``i64(x: u8)``
 * ``i64(x: str)``
 * ``i64(x: str, base: int)``
-* ``i64(x: i64)`` (no-op)
 
 ``i32`` type:
 
 * ``i32(x: int)``
 * ``i32(x: float)``
 * ``i32(x: i64)`` (truncate)
+* ``i32(x: i32)`` (no-op)
 * ``i32(x: i16)``
+* ``i32(x: u8)``
 * ``i32(x: str)``
 * ``i32(x: str, base: int)``
-* ``i32(x: i32)`` (no-op)
 
 ``i16`` type:
 
 * ``i16(x: int)``
 * ``i16(x: float)``
 * ``i16(x: i64)`` (truncate)
 * ``i16(x: i32)`` (truncate)
+* ``i16(x: i16)`` (no-op)
+* ``i16(x: u8)``
 * ``i16(x: str)``
 * ``i16(x: str, base: int)``
-* ``i16(x: i16)`` (no-op)
 
 Conversions from ``int`` to a native integer type raise
 ``OverflowError`` if the value is too large or small. Conversions from
@@ -80,6 +85,8 @@ Implicit conversions
 ``int`` values can be implicitly converted to a native integer type,
 for convenience. This means that these are equivalent::
 
+   from mypy_extensions import i64
+
    def implicit() -> None:
        # Implicit conversion of 0 (int) to i64
        x: i64 = 0
@@ -107,18 +114,23 @@ Operators
 * Comparisons (``==``, ``!=``, ``<``, etc.)
 * Augmented assignment (``x += y``, etc.)
 
-If one of the above native integer operations overflows or underflows,
-the behavior is undefined. Native integer types should only be used if
-all possible values are small enough for the type. For this reason,
-the arbitrary-precision ``int`` type is recommended unless the
-performance of integer operations is critical.
+If one of the above native integer operations overflows or underflows
+with signed operands, the behavior is undefined. Signed native integer
+types should only be used if all possible values are small enough for
+the type. For this reason, the arbitrary-precision ``int`` type is
+recommended for signed values unless the performance of integer
+operations is critical.
+
+Operations on unsigned integers (``u8``) wrap around on overflow.
 
 It's a compile-time error to mix different native integer types in a
 binary operation such as addition. An explicit conversion is required::
 
-  def add(x: i64, y: i32) -> None:
-      a = x + y  # Error (i64 + i32)
-      b = x + i64(y)  # OK
+    from mypy_extensions import i64, i32
+
+    def add(x: i64, y: i32) -> None:
+        a = x + y  # Error (i64 + i32)
+        b = x + i64(y)  # OK
 
 You can freely mix a native integer value and an arbitrary-precision
 ``int`` value in an operation. The native integer type is "sticky"

diff --git a/mypyc/doc/using_type_annotations.rst b/mypyc/doc/using_type_annotations.rst
@@ -33,6 +33,7 @@ implementations:
 * ``i64`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
 * ``i32`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
 * ``i16`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
+* ``u8`` (:ref:`documentation <native-ints>`, :ref:`native operations <int-ops>`)
 * ``float`` (:ref:`native operations <float-ops>`)
 * ``bool`` (:ref:`native operations <bool-ops>`)
 * ``str`` (:ref:`native operations <str-ops>`)
@@ -344,13 +345,13 @@ Native integer types
 --------------------
 
 You can use the native integer types ``i64`` (64-bit signed integer),
-``i32`` (32-bit signed integer), and ``i16`` (16-bit signed integer)
-if you know that integer values will always fit within fixed
-bounds. These types are faster than the arbitrary-precision ``int``
-type, since they don't require overflow checks on operations. ``i32``
-and ``i16`` may also use less memory than ``int`` values. The types
-are imported from the ``mypy_extensions`` module (installed via ``pip
-install mypy_extensions``).
+``i32`` (32-bit signed integer), ``i16`` (16-bit signed integer), and
+``u8`` (8-bit unsigned integer) if you know that integer values will
+always fit within fixed bounds. These types are faster than the
+arbitrary-precision ``int`` type, since they don't require overflow
+checks on operations. They may also use less memory than ``int``
+values. The types are imported from the ``mypy_extensions`` module
+(installed via ``pip install mypy_extensions``).
 
 Example::
 

diff --git a/mypyc/ir/ops.py b/mypyc/ir/ops.py
@@ -1162,6 +1162,7 @@ class ComparisonOp(RegisterOp):
     }
 
     signed_ops: Final = {"==": EQ, "!=": NEQ, "<": SLT, ">": SGT, "<=": SLE, ">=": SGE}
+    unsigned_ops: Final = {"==": EQ, "!=": NEQ, "<": ULT, ">": UGT, "<=": ULE, ">=": UGE}
 
     def __init__(self, lhs: Value, rhs: Value, op: int, line: int = -1) -> None:
         super().__init__(line)