Skip to content

Optimize arithmetic/bitwise operations of short, ushort, sbyte, and byte. #44849

Closed
@MineCake147E

Description

@MineCake147E

Description

Currently, RyuJIT generates codes for arithmetic operations of short, ushort, sbyte, and byte, like this.
The code has a lot of movsx and movzx, and seems to be sub-optimal than equivalent codes generated by Clang/LLVM.

Example difference of codegen between C# and C on x64

C# Code
using System;
public static class C {
    public static ushort Add(ushort left, ushort right) => (ushort)(left + right);
    public static short Add(short left, short right) => (short)(left + right);
    public static byte Add(byte left, byte right) => (byte)(left + right);
    public static sbyte Add(sbyte left, sbyte right) => (sbyte)(left + right);
    public static ushort Subtract(ushort left, ushort right) => (ushort)(left - right);
    public static short Subtract(short left, short right) => (short)(left - right);
    public static byte Subtract(byte left, byte right) => (byte)(left - right);
    public static sbyte Subtract(sbyte left, sbyte right) => (sbyte)(left - right);
    public static ushort Multiply(ushort left, ushort right) => (ushort)(left * right);
    public static short Multiply(short left, short right) => (short)(left * right);
    public static byte Multiply(byte left, byte right) => (byte)(left * right);
    public static sbyte Multiply(sbyte left, sbyte right) => (sbyte)(left * right);
    public static ushort Divide(ushort left, ushort right) => (ushort)(left / right);
    public static short Divide(short left, short right) => (short)(left / right);
    public static byte Divide(byte left, byte right) => (byte)(left / right);
    public static sbyte Divide(sbyte left, sbyte right) => (sbyte)(left / right);
    public static ushort Modulus(ushort left, ushort right) => (ushort)(left % right);
    public static short Modulus(short left, short right) => (short)(left % right);
    public static byte Modulus(byte left, byte right) => (byte)(left % right);
    public static sbyte Modulus(sbyte left, sbyte right) => (sbyte)(left % right);
    public static ushort Negate(ushort left) => (ushort)-left;
    public static short Negate(short left) => (short)-left;
    public static byte Negate(byte left) => (byte)-left;
    public static sbyte Negate(sbyte left) => (sbyte)-left;
    public static ushort BitwiseAnd(ushort left, ushort right) => (ushort)(left & right);
    public static short BitwiseAnd(short left, short right) => (short)(left & right);
    public static byte BitwiseAnd(byte left, byte right) => (byte)(left & right);
    public static sbyte BitwiseAnd(sbyte left, sbyte right) => (sbyte)(left & right);
    public static ushort BitwiseOr(ushort left, ushort right) => (ushort)(left | right);
    public static short BitwiseOr(short left, short right) => (short)(left | right);
    public static byte BitwiseOr(byte left, byte right) => (byte)(left | right);
    public static sbyte BitwiseOr(sbyte left, sbyte right) => (sbyte)(left | right);
    public static ushort BitwiseXor(ushort left, ushort right) => (ushort)(left ^ right);
    public static short BitwiseXor(short left, short right) => (short)(left ^ right);
    public static byte BitwiseXor(byte left, byte right) => (byte)(left ^ right);
    public static sbyte BitwiseXor(sbyte left, sbyte right) => (sbyte)(left ^ right);
    public static ushort BitwiseNot(ushort left) => (ushort)~left;
    public static short BitwiseNot(short left) => (short)~left;
    public static byte BitwiseNot(byte left) => (byte)~left;
    public static sbyte BitwiseNot(sbyte left) => (sbyte)~left;
    public static ushort ShiftRight(ushort left, int right) => (ushort)(left >> right);
    public static short ShiftArithmeticRight(short left, int right) => (short)(left >> right);
    public static byte ShiftRight(byte left, int right) => (byte)(left >> right);
    public static sbyte ShiftArithmeticRight(sbyte left, int right) => (sbyte)(left >> right);
    public static ushort ShiftLeft(ushort left, int right) => (ushort)(left << right);
    public static short ShiftLeft(short left, int right) => (short)(left << right);
    public static byte ShiftLeft(byte left, int right) => (byte)(left << right);
    public static sbyte ShiftLeft(sbyte left, int right) => (sbyte)(left << right);
    public static ushort DivideBy19(ushort left) => (ushort)(left / 19);
    public static short DivideBy19(short left) => (short)(left / 19);
    public static byte DivideBy19(byte left) => (byte)(left / 19);
    public static sbyte DivideBy19(sbyte left) => (sbyte)(left / 19);
    public static ushort ModulusBy19(ushort left) => (ushort)(left % 19);
    public static short ModulusBy19(short left) => (short)(left % 19);
    public static byte ModulusBy19(byte left) => (byte)(left % 19);
    public static sbyte ModulusBy19(sbyte left) => (sbyte)(left % 19);
}
RyuJIT Assembly(x64 Release)
; Core CLR v4.700.20.41105 on amd64

C.Add(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx edx, dx
    L0006: add eax, edx
    L0008: movzx eax, ax
    L000b: ret

C.Add(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rdx, dx
    L0008: add eax, edx
    L000a: movsx rax, ax
    L000e: ret

C.Add(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx edx, dl
    L0006: add eax, edx
    L0008: movzx eax, al
    L000b: ret

C.Add(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rdx, dl
    L0008: add eax, edx
    L000a: movsx rax, al
    L000e: ret

C.Subtract(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx edx, dx
    L0006: sub eax, edx
    L0008: movzx eax, ax
    L000b: ret

C.Subtract(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rdx, dx
    L0008: sub eax, edx
    L000a: movsx rax, ax
    L000e: ret

C.Subtract(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx edx, dl
    L0006: sub eax, edx
    L0008: movzx eax, al
    L000b: ret

C.Subtract(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rdx, dl
    L0008: sub eax, edx
    L000a: movsx rax, al
    L000e: ret

C.Multiply(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx edx, dx
    L0006: imul eax, edx
    L0009: movzx eax, ax
    L000c: ret

C.Multiply(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rdx, dx
    L0008: imul eax, edx
    L000b: movsx rax, ax
    L000f: ret

C.Multiply(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx edx, dl
    L0006: imul eax, edx
    L0009: movzx eax, al
    L000c: ret

C.Multiply(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rdx, dl
    L0008: imul eax, edx
    L000b: movsx rax, al
    L000f: ret

C.Divide(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx ecx, dx
    L0006: cdq
    L0007: idiv ecx
    L0009: movzx eax, ax
    L000c: ret

C.Divide(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rcx, dx
    L0008: cdq
    L0009: idiv ecx
    L000b: movsx rax, ax
    L000f: ret

C.Divide(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx ecx, dl
    L0006: cdq
    L0007: idiv ecx
    L0009: movzx eax, al
    L000c: ret

C.Divide(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rcx, dl
    L0008: cdq
    L0009: idiv ecx
    L000b: movsx rax, al
    L000f: ret

C.Modulus(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx ecx, dx
    L0006: cdq
    L0007: idiv ecx
    L0009: movzx eax, dx
    L000c: ret

C.Modulus(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rcx, dx
    L0008: cdq
    L0009: idiv ecx
    L000b: movsx rax, dx
    L000f: ret

C.Modulus(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx ecx, dl
    L0006: cdq
    L0007: idiv ecx
    L0009: movzx eax, dl
    L000c: ret

C.Modulus(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rcx, dl
    L0008: cdq
    L0009: idiv ecx
    L000b: movsx rax, dl
    L000f: ret

C.Negate(UInt16)
    L0000: movzx eax, cx
    L0003: neg eax
    L0005: movzx eax, ax
    L0008: ret

C.Negate(Int16)
    L0000: movsx rax, cx
    L0004: neg eax
    L0006: movsx rax, ax
    L000a: ret

C.Negate(Byte)
    L0000: movzx eax, cl
    L0003: neg eax
    L0005: movzx eax, al
    L0008: ret

C.Negate(SByte)
    L0000: movsx rax, cl
    L0004: neg eax
    L0006: movsx rax, al
    L000a: ret

C.BitwiseAnd(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx edx, dx
    L0006: and eax, edx
    L0008: movzx eax, ax
    L000b: ret

C.BitwiseAnd(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rdx, dx
    L0008: and eax, edx
    L000a: movsx rax, ax
    L000e: ret

C.BitwiseAnd(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx edx, dl
    L0006: and eax, edx
    L0008: movzx eax, al
    L000b: ret

C.BitwiseAnd(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rdx, dl
    L0008: and eax, edx
    L000a: movsx rax, al
    L000e: ret

C.BitwiseOr(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx edx, dx
    L0006: or eax, edx
    L0008: movzx eax, ax
    L000b: ret

C.BitwiseOr(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rdx, dx
    L0008: or eax, edx
    L000a: movsx rax, ax
    L000e: ret

C.BitwiseOr(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx edx, dl
    L0006: or eax, edx
    L0008: movzx eax, al
    L000b: ret

C.BitwiseOr(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rdx, dl
    L0008: or eax, edx
    L000a: movsx rax, al
    L000e: ret

C.BitwiseXor(UInt16, UInt16)
    L0000: movzx eax, cx
    L0003: movzx edx, dx
    L0006: xor eax, edx
    L0008: movzx eax, ax
    L000b: ret

C.BitwiseXor(Int16, Int16)
    L0000: movsx rax, cx
    L0004: movsx rdx, dx
    L0008: xor eax, edx
    L000a: movsx rax, ax
    L000e: ret

C.BitwiseXor(Byte, Byte)
    L0000: movzx eax, cl
    L0003: movzx edx, dl
    L0006: xor eax, edx
    L0008: movzx eax, al
    L000b: ret

C.BitwiseXor(SByte, SByte)
    L0000: movsx rax, cl
    L0004: movsx rdx, dl
    L0008: xor eax, edx
    L000a: movsx rax, al
    L000e: ret

C.BitwiseNot(UInt16)
    L0000: movzx eax, cx
    L0003: not eax
    L0005: movzx eax, ax
    L0008: ret

C.BitwiseNot(Int16)
    L0000: movsx rax, cx
    L0004: not eax
    L0006: movsx rax, ax
    L000a: ret

C.BitwiseNot(Byte)
    L0000: movzx eax, cl
    L0003: not eax
    L0005: movzx eax, al
    L0008: ret

C.BitwiseNot(SByte)
    L0000: movsx rax, cl
    L0004: not eax
    L0006: movsx rax, al
    L000a: ret

C.ShiftRight(UInt16, Int32)
    L0000: movzx eax, cx
    L0003: mov ecx, edx
    L0005: sar eax, cl
    L0007: movzx eax, ax
    L000a: ret

C.ShiftArithmeticRight(Int16, Int32)
    L0000: movsx rax, cx
    L0004: mov ecx, edx
    L0006: sar eax, cl
    L0008: movsx rax, ax
    L000c: ret

C.ShiftRight(Byte, Int32)
    L0000: movzx eax, cl
    L0003: mov ecx, edx
    L0005: sar eax, cl
    L0007: movzx eax, al
    L000a: ret

C.ShiftArithmeticRight(SByte, Int32)
    L0000: movsx rax, cl
    L0004: mov ecx, edx
    L0006: sar eax, cl
    L0008: movsx rax, al
    L000c: ret

C.ShiftLeft(UInt16, Int32)
    L0000: movzx eax, cx
    L0003: mov ecx, edx
    L0005: shl eax, cl
    L0007: movzx eax, ax
    L000a: ret

C.ShiftLeft(Int16, Int32)
    L0000: movsx rax, cx
    L0004: mov ecx, edx
    L0006: shl eax, cl
    L0008: movsx rax, ax
    L000c: ret

C.ShiftLeft(Byte, Int32)
    L0000: movzx eax, cl
    L0003: mov ecx, edx
    L0005: shl eax, cl
    L0007: movzx eax, al
    L000a: ret

C.ShiftLeft(SByte, Int32)
    L0000: movsx rax, cl
    L0004: mov ecx, edx
    L0006: shl eax, cl
    L0008: movsx rax, al
    L000c: ret

C.DivideBy19(UInt16)
    L0000: movzx edx, cx
    L0003: mov ecx, 0x6bca1af3
    L0008: mov eax, ecx
    L000a: imul edx
    L000c: mov eax, edx
    L000e: shr eax, 0x1f
    L0011: sar edx, 3
    L0014: add eax, edx
    L0016: movzx eax, ax
    L0019: ret

C.DivideBy19(Int16)
    L0000: movsx rdx, cx
    L0004: mov ecx, 0x6bca1af3
    L0009: mov eax, ecx
    L000b: imul edx
    L000d: mov eax, edx
    L000f: shr eax, 0x1f
    L0012: sar edx, 3
    L0015: add eax, edx
    L0017: movsx rax, ax
    L001b: ret

C.DivideBy19(Byte)
    L0000: movzx edx, cl
    L0003: mov ecx, 0x6bca1af3
    L0008: mov eax, ecx
    L000a: imul edx
    L000c: mov eax, edx
    L000e: shr eax, 0x1f
    L0011: sar edx, 3
    L0014: add eax, edx
    L0016: movzx eax, al
    L0019: ret

C.DivideBy19(SByte)
    L0000: movsx rdx, cl
    L0004: mov ecx, 0x6bca1af3
    L0009: mov eax, ecx
    L000b: imul edx
    L000d: mov eax, edx
    L000f: shr eax, 0x1f
    L0012: sar edx, 3
    L0015: add eax, edx
    L0017: movsx rax, al
    L001b: ret

C.ModulusBy19(UInt16)
    L0000: movzx ecx, cx
    L0003: mov edx, 0x6bca1af3
    L0008: mov eax, edx
    L000a: imul ecx
    L000c: mov eax, edx
    L000e: shr eax, 0x1f
    L0011: sar edx, 3
    L0014: add eax, edx
    L0016: imul eax, 0x13
    L0019: sub ecx, eax
    L001b: movzx eax, cx
    L001e: ret

C.ModulusBy19(Int16)
    L0000: movsx rcx, cx
    L0004: mov edx, 0x6bca1af3
    L0009: mov eax, edx
    L000b: imul ecx
    L000d: mov eax, edx
    L000f: shr eax, 0x1f
    L0012: sar edx, 3
    L0015: add eax, edx
    L0017: imul eax, 0x13
    L001a: sub ecx, eax
    L001c: movsx rax, cx
    L0020: ret

C.ModulusBy19(Byte)
    L0000: movzx ecx, cl
    L0003: mov edx, 0x6bca1af3
    L0008: mov eax, edx
    L000a: imul ecx
    L000c: mov eax, edx
    L000e: shr eax, 0x1f
    L0011: sar edx, 3
    L0014: add eax, edx
    L0016: imul eax, 0x13
    L0019: sub ecx, eax
    L001b: movzx eax, cl
    L001e: ret

C.ModulusBy19(SByte)
    L0000: movsx rcx, cl
    L0004: mov edx, 0x6bca1af3
    L0009: mov eax, edx
    L000b: imul ecx
    L000d: mov eax, edx
    L000f: shr eax, 0x1f
    L0012: sar edx, 3
    L0015: add eax, edx
    L0017: imul eax, 0x13
    L001a: sub ecx, eax
    L001c: movsx rax, cl
    L0020: ret

Generated by SharpLab.io

C code
#include <stdint.h>

uint16_t Add(uint16_t left, uint16_t right) { return (uint16_t)(left + right);}
int16_t Add(int16_t left, int16_t right) { return (int16_t)(left + right);}
uint8_t Add(uint8_t left, uint8_t right) { return (uint8_t)(left + right);}
int8_t Add(int8_t left, int8_t right) { return (int8_t)(left + right);}
uint16_t Subtract(uint16_t left, uint16_t right) { return (uint16_t)(left - right);}
int16_t Subtract(int16_t left, int16_t right) { return (int16_t)(left - right);}
uint8_t Subtract(uint8_t left, uint8_t right) { return (uint8_t)(left - right);}
int8_t Subtract(int8_t left, int8_t right) { return (int8_t)(left - right);}
uint16_t Multiply(uint16_t left, uint16_t right) { return (uint16_t)(left * right);}
int16_t Multiply(int16_t left, int16_t right) { return (int16_t)(left * right);}
uint8_t Multiply(uint8_t left, uint8_t right) { return (uint8_t)(left * right);}
int8_t Multiply(int8_t left, int8_t right) { return (int8_t)(left * right);}
uint16_t Divide(uint16_t left, uint16_t right) { return (uint16_t)(left / right);}
int16_t Divide(int16_t left, int16_t right) { return (int16_t)(left / right);}
uint8_t Divide(uint8_t left, uint8_t right) { return (uint8_t)(left / right);}
int8_t Divide(int8_t left, int8_t right) { return (int8_t)(left / right);}
uint16_t Modulus(uint16_t left, uint16_t right) { return (uint16_t)(left % right);}
int16_t Modulus(int16_t left, int16_t right) { return (int16_t)(left % right);}
uint8_t Modulus(uint8_t left, uint8_t right) { return (uint8_t)(left % right);}
int8_t Modulus(int8_t left, int8_t right) { return (int8_t)(left % right);}
uint16_t Negate(uint16_t left) { return (uint16_t)(-left);}
int16_t Negate(int16_t left) { return (int16_t)(-left);}
uint8_t Negate(uint8_t left) { return (uint8_t)(-left);}
int8_t Negate(int8_t left) { return (int8_t)(-left);}
uint16_t BitwiseAnd(uint16_t left, uint16_t right) { return (uint16_t)(left & right);}
int16_t BitwiseAnd(int16_t left, int16_t right) { return (int16_t)(left & right);}
uint8_t BitwiseAnd(uint8_t left, uint8_t right) { return (uint8_t)(left & right);}
int8_t BitwiseAnd(int8_t left, int8_t right) { return (int8_t)(left & right);}
uint16_t BitwiseOr(uint16_t left, uint16_t right) { return (uint16_t)(left | right);}
int16_t BitwiseOr(int16_t left, int16_t right) { return (int16_t)(left | right);}
uint8_t BitwiseOr(uint8_t left, uint8_t right) { return (uint8_t)(left | right);}
int8_t BitwiseOr(int8_t left, int8_t right) { return (int8_t)(left | right);}
uint16_t BitwiseXor(uint16_t left, uint16_t right) { return (uint16_t)(left ^ right);}
int16_t BitwiseXor(int16_t left, int16_t right) { return (int16_t)(left ^ right);}
uint8_t BitwiseXor(uint8_t left, uint8_t right) { return (uint8_t)(left ^ right);}
int8_t BitwiseXor(int8_t left, int8_t right) { return (int8_t)(left ^ right);}
uint16_t BitwiseNot(uint16_t left) { return (uint16_t) ~left;}
int16_t BitwiseNot(int16_t left) { return (int16_t) ~left;}
uint8_t BitwiseNot(uint8_t left) { return (uint8_t) ~left;}
int8_t BitwiseNot(int8_t left) { return (int8_t) ~left;}
uint16_t ShiftRight(uint16_t left, int32_t right) { return (uint16_t)(left >> right);}
int16_t ShiftArithmeticRight(int16_t left, int32_t right) { return (int16_t)(left >> right);}
uint8_t ShiftRight(uint8_t left, int32_t right) { return (uint8_t)(left >> right);}
int8_t ShiftArithmeticRight(int8_t left, int32_t right) { return (int8_t)(left >> right);}
uint16_t ShiftLeft(uint16_t left, int32_t right) { return (uint16_t)(left << right);}
int16_t ShiftLeft(int16_t left, int32_t right) { return (int16_t)(left << right);}
uint8_t ShiftLeft(uint8_t left, int32_t right) { return (uint8_t)(left << right);}
int8_t ShiftLeft(int8_t left, int32_t right) { return (int8_t)(left << right);}
uint16_t DivideBy19(uint16_t left) { return (uint16_t)(left / 19);}
int16_t DivideBy19(int16_t left) { return (int16_t)(left / 19);}
uint8_t DivideBy19(uint8_t left) { return (uint8_t)(left / 19);}
int8_t DivideBy19(int8_t left) { return (int8_t)(left / 19);}
uint16_t ModulusBy19(uint16_t left) { return (uint16_t)(left % 19);}
int16_t ModulusBy19(int16_t left) { return (int16_t)(left % 19);}
uint8_t ModulusBy19(uint8_t left) { return (uint8_t)(left % 19);}
int8_t ModulusBy19(int8_t left) { return (int8_t)(left % 19);}
LLVM output assembly(Clang12.0 -O3 -ffast-math -march=x86-64-v3)
Add(unsigned short, unsigned short):                               # @Add(unsigned short, unsigned short)
        lea     eax, [rdi + rsi]
        ret
Add(short, short):                               # @Add(short, short)
        lea     eax, [rdi + rsi]
        ret
Add(unsigned char, unsigned char):                               # @Add(unsigned char, unsigned char)
        lea     eax, [rsi + rdi]
        ret
Add(signed char, signed char):                               # @Add(signed char, signed char)
        lea     eax, [rsi + rdi]
        ret
Subtract(unsigned short, unsigned short):                          # @Subtract(unsigned short, unsigned short)
        mov     eax, edi
        sub     eax, esi
        ret
Subtract(short, short):                          # @Subtract(short, short)
        mov     eax, edi
        sub     eax, esi
        ret
Subtract(unsigned char, unsigned char):                          # @Subtract(unsigned char, unsigned char)
        mov     eax, edi
        sub     al, sil
        ret
Subtract(signed char, signed char):                          # @Subtract(signed char, signed char)
        mov     eax, edi
        sub     al, sil
        ret
Multiply(unsigned short, unsigned short):                          # @Multiply(unsigned short, unsigned short)
        mov     eax, edi
        imul    eax, esi
        ret
Multiply(short, short):                          # @Multiply(short, short)
        mov     eax, edi
        imul    eax, esi
        ret
Multiply(unsigned char, unsigned char):                          # @Multiply(unsigned char, unsigned char)
        mov     eax, esi
        mul     dil
        ret
Multiply(signed char, signed char):                          # @Multiply(signed char, signed char)
        mov     eax, esi
        mul     dil
        ret
Divide(unsigned short, unsigned short):                            # @Divide(unsigned short, unsigned short)
        mov     eax, edi
        xor     edx, edx
        div     si
        ret
Divide(short, short):                            # @Divide(short, short)
        mov     eax, edi
        cdq
        idiv    esi
        ret
Divide(unsigned char, unsigned char):                            # @Divide(unsigned char, unsigned char)
        movzx   eax, dil
        div     sil
        ret
Divide(signed char, signed char):                            # @Divide(signed char, signed char)
        mov     eax, edi
        cwd
        idiv    si
        ret
Modulus(unsigned short, unsigned short):                           # @Modulus(unsigned short, unsigned short)
        mov     eax, edi
        xor     edx, edx
        div     si
        mov     eax, edx
        ret
Modulus(short, short):                           # @Modulus(short, short)
        mov     eax, edi
        cdq
        idiv    esi
        mov     eax, edx
        ret
Modulus(unsigned char, unsigned char):                           # @Modulus(unsigned char, unsigned char)
        movzx   eax, dil
        div     sil
        movzx   eax, ah
        ret
Modulus(signed char, signed char):                           # @Modulus(signed char, signed char)
        mov     eax, edi
        cwd
        idiv    si
        mov     eax, edx
        ret
Negate(unsigned short):                             # @Negate(unsigned short)
        mov     eax, edi
        neg     eax
        ret
Negate(short):                             # @Negate(short)
        mov     eax, edi
        neg     eax
        ret
Negate(unsigned char):                             # @Negate(unsigned char)
        mov     eax, edi
        neg     al
        ret
Negate(signed char):                             # @Negate(signed char)
        mov     eax, edi
        neg     al
        ret
BitwiseAnd(unsigned short, unsigned short):                       # @BitwiseAnd(unsigned short, unsigned short)
        mov     eax, edi
        and     eax, esi
        ret
BitwiseAnd(short, short):                       # @BitwiseAnd(short, short)
        mov     eax, edi
        and     eax, esi
        ret
BitwiseAnd(unsigned char, unsigned char):                       # @BitwiseAnd(unsigned char, unsigned char)
        mov     eax, edi
        and     eax, esi
        ret
BitwiseAnd(signed char, signed char):                       # @BitwiseAnd(signed char, signed char)
        mov     eax, edi
        and     eax, esi
        ret
BitwiseOr(unsigned short, unsigned short):                         # @BitwiseOr(unsigned short, unsigned short)
        mov     eax, edi
        or      eax, esi
        ret
BitwiseOr(short, short):                         # @BitwiseOr(short, short)
        mov     eax, edi
        or      eax, esi
        ret
BitwiseOr(unsigned char, unsigned char):                         # @BitwiseOr(unsigned char, unsigned char)
        mov     eax, edi
        or      eax, esi
        ret
BitwiseOr(signed char, signed char):                         # @BitwiseOr(signed char, signed char)
        mov     eax, edi
        or      eax, esi
        ret
BitwiseXor(unsigned short, unsigned short):                       # @BitwiseXor(unsigned short, unsigned short)
        mov     eax, edi
        xor     eax, esi
        ret
BitwiseXor(short, short):                       # @BitwiseXor(short, short)
        mov     eax, edi
        xor     eax, esi
        ret
BitwiseXor(unsigned char, unsigned char):                       # @BitwiseXor(unsigned char, unsigned char)
        mov     eax, edi
        xor     eax, esi
        ret
BitwiseXor(signed char, signed char):                       # @BitwiseXor(signed char, signed char)
        mov     eax, edi
        xor     eax, esi
        ret
BitwiseNot(unsigned short):                        # @BitwiseNot(unsigned short)
        mov     eax, edi
        not     eax
        ret
BitwiseNot(short):                        # @BitwiseNot(short)
        mov     eax, edi
        not     eax
        ret
BitwiseNot(unsigned char):                        # @BitwiseNot(unsigned char)
        mov     eax, edi
        not     al
        ret
BitwiseNot(signed char):                        # @BitwiseNot(signed char)
        mov     eax, edi
        not     al
        ret
ShiftRight(unsigned short, int):                       # @ShiftRight(unsigned short, int)
        shrx    eax, edi, esi
        ret
ShiftArithmeticRight(short, int):             # @ShiftArithmeticRight(short, int)
        sarx    eax, edi, esi
        ret
ShiftRight(unsigned char, int):                       # @ShiftRight(unsigned char, int)
        shrx    eax, edi, esi
        ret
ShiftArithmeticRight(signed char, int):             # @ShiftArithmeticRight(signed char, int)
        sarx    eax, edi, esi
        ret
ShiftLeft(unsigned short, int):                         # @ShiftLeft(unsigned short, int)
        shlx    eax, edi, esi
        ret
ShiftLeft(short, int):                         # @ShiftLeft(short, int)
        shlx    eax, edi, esi
        ret
ShiftLeft(unsigned char, int):                         # @ShiftLeft(unsigned char, int)
        shlx    eax, edi, esi
        ret
ShiftLeft(signed char, int):                         # @ShiftLeft(signed char, int)
        shlx    eax, edi, esi
        ret
DivideBy19(unsigned short):                        # @DivideBy19(unsigned short)
        imul    eax, edi, 55189
        shr     eax, 20
        ret
DivideBy19(short):                        # @DivideBy19(short)
        imul    eax, edi, -10347
        shr     eax, 16
        add     eax, edi
        movzx   ecx, ax
        movsx   eax, cx
        shr     ecx, 15
        sar     eax, 4
        add     eax, ecx
        ret
DivideBy19(unsigned char):                        # @DivideBy19(unsigned char)
        lea     eax, [rdi + 8*rdi]
        lea     eax, [rax + 2*rax]
        shr     eax, 9
        ret
DivideBy19(signed char):                        # @DivideBy19(signed char)
        lea     eax, [rdi + 8*rdi]
        lea     ecx, [rax + 2*rax]
        movzx   eax, cx
        shr     ecx, 9
        shr     eax, 15
        add     al, cl
        ret
ModulusBy19(unsigned short):                       # @ModulusBy19(unsigned short)
        mov     eax, edi
        imul    ecx, edi, 55189
        shr     ecx, 20
        lea     edx, [rcx + 8*rcx]
        lea     ecx, [rcx + 2*rdx]
        sub     eax, ecx
        ret
ModulusBy19(short):                       # @ModulusBy19(short)
        mov     eax, edi
        imul    ecx, edi, -10347
        shr     ecx, 16
        add     ecx, edi
        movzx   ecx, cx
        movsx   edx, cx
        shr     ecx, 15
        sar     edx, 4
        add     edx, ecx
        lea     ecx, [rdx + 8*rdx]
        lea     ecx, [rdx + 2*rcx]
        sub     eax, ecx
        ret
ModulusBy19(unsigned char):                       # @ModulusBy19(unsigned char)
        mov     eax, edi
        lea     ecx, [rax + 8*rax]
        lea     ecx, [rcx + 2*rcx]
        shr     ecx, 9
        lea     edx, [rcx + 8*rcx]
        lea     ecx, [rcx + 2*rdx]
        sub     al, cl
        ret
ModulusBy19(signed char):                       # @ModulusBy19(signed char)
        mov     eax, edi
        lea     ecx, [rax + 8*rax]
        lea     ecx, [rcx + 2*rcx]
        movzx   edx, cx
        shr     ecx, 9
        shr     edx, 15
        add     dl, cl
        movzx   ecx, dl
        lea     edx, [rcx + 8*rcx]
        lea     ecx, [rcx + 2*rdx]
        sub     al, cl
        ret

Generated by Compiler Explorer

GCC 11.0 output assembly(-O3 -ffast-math -march=x86-64-v3)
Add(unsigned short, unsigned short):
        lea     eax, [rsi+rdi]
        ret
Add(short, short):
        lea     eax, [rsi+rdi]
        ret
Add(unsigned char, unsigned char):
        lea     eax, [rsi+rdi]
        ret
Add(signed char, signed char):
        lea     eax, [rsi+rdi]
        ret
Subtract(unsigned short, unsigned short):
        mov     eax, edi
        sub     eax, esi
        ret
Subtract(short, short):
        mov     eax, edi
        sub     eax, esi
        ret
Subtract(unsigned char, unsigned char):
        mov     eax, edi
        sub     eax, esi
        ret
Subtract(signed char, signed char):
        mov     eax, edi
        sub     eax, esi
        ret
Multiply(unsigned short, unsigned short):
        mov     eax, esi
        imul    eax, edi
        ret
Multiply(short, short):
        mov     eax, esi
        imul    eax, edi
        ret
Multiply(unsigned char, unsigned char):
        mov     eax, esi
        imul    eax, edi
        ret
Multiply(signed char, signed char):
        mov     eax, esi
        imul    eax, edi
        ret
Divide(unsigned short, unsigned short):
        mov     eax, edi
        xor     edx, edx
        div     si
        ret
Divide(short, short):
        movsx   eax, di
        movsx   esi, si
        cdq
        idiv    esi
        ret
Divide(unsigned char, unsigned char):
        movzx   eax, dil
        div     sil
        ret
Divide(signed char, signed char):
        movsx   eax, dil
        movsx   esi, sil
        cdq
        idiv    esi
        ret
Modulus(unsigned short, unsigned short):
        mov     eax, edi
        xor     edx, edx
        div     si
        mov     eax, edx
        ret
Modulus(short, short):
        movsx   eax, di
        movsx   esi, si
        cdq
        idiv    esi
        mov     eax, edx
        ret
Modulus(unsigned char, unsigned char):
        movzx   eax, dil
        div     sil
        movzx   eax, ah
        ret
Modulus(signed char, signed char):
        movsx   eax, dil
        movsx   esi, sil
        cdq
        idiv    esi
        mov     eax, edx
        ret
Negate(unsigned short):
        mov     eax, edi
        neg     eax
        ret
Negate(short):
        mov     eax, edi
        neg     eax
        ret
Negate(unsigned char):
        mov     eax, edi
        neg     eax
        ret
Negate(signed char):
        mov     eax, edi
        neg     eax
        ret
BitwiseAnd(unsigned short, unsigned short):
        mov     eax, esi
        and     eax, edi
        ret
BitwiseAnd(short, short):
        mov     eax, esi
        and     eax, edi
        ret
BitwiseAnd(unsigned char, unsigned char):
        mov     eax, esi
        and     eax, edi
        ret
BitwiseAnd(signed char, signed char):
        mov     eax, esi
        and     eax, edi
        ret
BitwiseOr(unsigned short, unsigned short):
        mov     eax, esi
        or      eax, edi
        ret
BitwiseOr(short, short):
        mov     eax, esi
        or      eax, edi
        ret
BitwiseOr(unsigned char, unsigned char):
        mov     eax, esi
        or      eax, edi
        ret
BitwiseOr(signed char, signed char):
        mov     eax, esi
        or      eax, edi
        ret
BitwiseXor(unsigned short, unsigned short):
        mov     eax, esi
        xor     eax, edi
        ret
BitwiseXor(short, short):
        mov     eax, esi
        xor     eax, edi
        ret
BitwiseXor(unsigned char, unsigned char):
        mov     eax, esi
        xor     eax, edi
        ret
BitwiseXor(signed char, signed char):
        mov     eax, esi
        xor     eax, edi
        ret
BitwiseNot(unsigned short):
        mov     eax, edi
        not     eax
        ret
BitwiseNot(short):
        mov     eax, edi
        not     eax
        ret
BitwiseNot(unsigned char):
        mov     eax, edi
        not     eax
        ret
BitwiseNot(signed char):
        mov     eax, edi
        not     eax
        ret
ShiftRight(unsigned short, int):
        movzx   edi, di
        sarx    eax, edi, esi
        ret
ShiftArithmeticRight(short, int):
        movsx   edi, di
        sarx    eax, edi, esi
        ret
ShiftRight(unsigned char, int):
        movzx   edi, dil
        sarx    eax, edi, esi
        ret
ShiftArithmeticRight(signed char, int):
        movsx   edi, dil
        sarx    eax, edi, esi
        ret
ShiftLeft(unsigned short, int):
        movzx   edi, di
        shlx    eax, edi, esi
        ret
ShiftLeft(short, int):
        movsx   edi, di
        shlx    eax, edi, esi
        ret
ShiftLeft(unsigned char, int):
        movzx   edi, dil
        shlx    eax, edi, esi
        ret
ShiftLeft(signed char, int):
        movsx   edi, dil
        shlx    eax, edi, esi
        ret
DivideBy19(unsigned short):
        movzx   eax, di
        imul    eax, eax, 55189
        shr     eax, 20
        ret
DivideBy19(short):
        movsx   eax, di
        imul    eax, eax, -10347
        shr     eax, 16
        add     eax, edi
        sar     di, 15
        sar     ax, 4
        sub     eax, edi
        ret
DivideBy19(unsigned char):
        mov     edx, 27
        mov     eax, edi
        mul     dl
        shr     ax, 9
        ret
DivideBy19(signed char):
        mov     eax, 27
        imul    dil
        sar     dil, 7
        sar     ax, 9
        sub     eax, edi
        ret
ModulusBy19(unsigned short):
        movzx   eax, di
        imul    eax, eax, 55189
        shr     eax, 20
        lea     edx, [rax+rax*8]
        lea     edx, [rax+rdx*2]
        mov     eax, edi
        sub     eax, edx
        ret
ModulusBy19(short):
        movsx   eax, di
        mov     edx, edi
        imul    eax, eax, -10347
        sar     dx, 15
        shr     eax, 16
        add     eax, edi
        sar     ax, 4
        sub     eax, edx
        lea     edx, [rax+rax*8]
        lea     edx, [rax+rdx*2]
        mov     eax, edi
        sub     eax, edx
        ret
ModulusBy19(unsigned char):
        mov     eax, 27
        mul     dil
        shr     ax, 9
        lea     edx, [rax+rax*8]
        lea     edx, [rax+rdx*2]
        mov     eax, edi
        sub     eax, edx
        ret
ModulusBy19(signed char):
        mov     eax, 27
        mov     edx, edi
        imul    dil
        sar     dl, 7
        sar     ax, 9
        sub     eax, edx
        lea     edx, [rax+rax*8]
        lea     edx, [rax+rdx*2]
        mov     eax, edi
        sub     eax, edx
        ret

Generated by Compiler Explorer

LLVM suggests that the movzx and movsx instructions may sometimes be unnecessary.
O3 Clang/LLVM and O3 GCC have many odd optimizations, such as replacing constant multiplication with some lea instructions, but I recommend generating code that is more similar to Clang/LLVM.

category:cq
theme:codegen
skill-level:intermediate
cost:large
impact:medium

Metadata

Metadata

Assignees

Labels

Priority:3Work that is nice to havearea-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIoptimizationtenet-performancePerformance related issue

Type

No type

Projects

Status

Done

Relationships

None yet

Development

No branches or pull requests

Issue actions