Closed
Description
Description
Currently, RyuJIT generates codes for arithmetic operations of short
, ushort
, sbyte
, and byte
, like this.
The code has a lot of movsx
and movzx
, and seems to be sub-optimal than equivalent codes generated by Clang/LLVM.
Example difference of codegen between C# and C on x64
C# Code
using System;
public static class C {
public static ushort Add(ushort left, ushort right) => (ushort)(left + right);
public static short Add(short left, short right) => (short)(left + right);
public static byte Add(byte left, byte right) => (byte)(left + right);
public static sbyte Add(sbyte left, sbyte right) => (sbyte)(left + right);
public static ushort Subtract(ushort left, ushort right) => (ushort)(left - right);
public static short Subtract(short left, short right) => (short)(left - right);
public static byte Subtract(byte left, byte right) => (byte)(left - right);
public static sbyte Subtract(sbyte left, sbyte right) => (sbyte)(left - right);
public static ushort Multiply(ushort left, ushort right) => (ushort)(left * right);
public static short Multiply(short left, short right) => (short)(left * right);
public static byte Multiply(byte left, byte right) => (byte)(left * right);
public static sbyte Multiply(sbyte left, sbyte right) => (sbyte)(left * right);
public static ushort Divide(ushort left, ushort right) => (ushort)(left / right);
public static short Divide(short left, short right) => (short)(left / right);
public static byte Divide(byte left, byte right) => (byte)(left / right);
public static sbyte Divide(sbyte left, sbyte right) => (sbyte)(left / right);
public static ushort Modulus(ushort left, ushort right) => (ushort)(left % right);
public static short Modulus(short left, short right) => (short)(left % right);
public static byte Modulus(byte left, byte right) => (byte)(left % right);
public static sbyte Modulus(sbyte left, sbyte right) => (sbyte)(left % right);
public static ushort Negate(ushort left) => (ushort)-left;
public static short Negate(short left) => (short)-left;
public static byte Negate(byte left) => (byte)-left;
public static sbyte Negate(sbyte left) => (sbyte)-left;
public static ushort BitwiseAnd(ushort left, ushort right) => (ushort)(left & right);
public static short BitwiseAnd(short left, short right) => (short)(left & right);
public static byte BitwiseAnd(byte left, byte right) => (byte)(left & right);
public static sbyte BitwiseAnd(sbyte left, sbyte right) => (sbyte)(left & right);
public static ushort BitwiseOr(ushort left, ushort right) => (ushort)(left | right);
public static short BitwiseOr(short left, short right) => (short)(left | right);
public static byte BitwiseOr(byte left, byte right) => (byte)(left | right);
public static sbyte BitwiseOr(sbyte left, sbyte right) => (sbyte)(left | right);
public static ushort BitwiseXor(ushort left, ushort right) => (ushort)(left ^ right);
public static short BitwiseXor(short left, short right) => (short)(left ^ right);
public static byte BitwiseXor(byte left, byte right) => (byte)(left ^ right);
public static sbyte BitwiseXor(sbyte left, sbyte right) => (sbyte)(left ^ right);
public static ushort BitwiseNot(ushort left) => (ushort)~left;
public static short BitwiseNot(short left) => (short)~left;
public static byte BitwiseNot(byte left) => (byte)~left;
public static sbyte BitwiseNot(sbyte left) => (sbyte)~left;
public static ushort ShiftRight(ushort left, int right) => (ushort)(left >> right);
public static short ShiftArithmeticRight(short left, int right) => (short)(left >> right);
public static byte ShiftRight(byte left, int right) => (byte)(left >> right);
public static sbyte ShiftArithmeticRight(sbyte left, int right) => (sbyte)(left >> right);
public static ushort ShiftLeft(ushort left, int right) => (ushort)(left << right);
public static short ShiftLeft(short left, int right) => (short)(left << right);
public static byte ShiftLeft(byte left, int right) => (byte)(left << right);
public static sbyte ShiftLeft(sbyte left, int right) => (sbyte)(left << right);
public static ushort DivideBy19(ushort left) => (ushort)(left / 19);
public static short DivideBy19(short left) => (short)(left / 19);
public static byte DivideBy19(byte left) => (byte)(left / 19);
public static sbyte DivideBy19(sbyte left) => (sbyte)(left / 19);
public static ushort ModulusBy19(ushort left) => (ushort)(left % 19);
public static short ModulusBy19(short left) => (short)(left % 19);
public static byte ModulusBy19(byte left) => (byte)(left % 19);
public static sbyte ModulusBy19(sbyte left) => (sbyte)(left % 19);
}
RyuJIT Assembly(x64 Release)
; Core CLR v4.700.20.41105 on amd64
C.Add(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx edx, dx
L0006: add eax, edx
L0008: movzx eax, ax
L000b: ret
C.Add(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rdx, dx
L0008: add eax, edx
L000a: movsx rax, ax
L000e: ret
C.Add(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx edx, dl
L0006: add eax, edx
L0008: movzx eax, al
L000b: ret
C.Add(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rdx, dl
L0008: add eax, edx
L000a: movsx rax, al
L000e: ret
C.Subtract(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx edx, dx
L0006: sub eax, edx
L0008: movzx eax, ax
L000b: ret
C.Subtract(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rdx, dx
L0008: sub eax, edx
L000a: movsx rax, ax
L000e: ret
C.Subtract(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx edx, dl
L0006: sub eax, edx
L0008: movzx eax, al
L000b: ret
C.Subtract(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rdx, dl
L0008: sub eax, edx
L000a: movsx rax, al
L000e: ret
C.Multiply(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx edx, dx
L0006: imul eax, edx
L0009: movzx eax, ax
L000c: ret
C.Multiply(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rdx, dx
L0008: imul eax, edx
L000b: movsx rax, ax
L000f: ret
C.Multiply(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx edx, dl
L0006: imul eax, edx
L0009: movzx eax, al
L000c: ret
C.Multiply(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rdx, dl
L0008: imul eax, edx
L000b: movsx rax, al
L000f: ret
C.Divide(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx ecx, dx
L0006: cdq
L0007: idiv ecx
L0009: movzx eax, ax
L000c: ret
C.Divide(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rcx, dx
L0008: cdq
L0009: idiv ecx
L000b: movsx rax, ax
L000f: ret
C.Divide(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx ecx, dl
L0006: cdq
L0007: idiv ecx
L0009: movzx eax, al
L000c: ret
C.Divide(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rcx, dl
L0008: cdq
L0009: idiv ecx
L000b: movsx rax, al
L000f: ret
C.Modulus(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx ecx, dx
L0006: cdq
L0007: idiv ecx
L0009: movzx eax, dx
L000c: ret
C.Modulus(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rcx, dx
L0008: cdq
L0009: idiv ecx
L000b: movsx rax, dx
L000f: ret
C.Modulus(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx ecx, dl
L0006: cdq
L0007: idiv ecx
L0009: movzx eax, dl
L000c: ret
C.Modulus(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rcx, dl
L0008: cdq
L0009: idiv ecx
L000b: movsx rax, dl
L000f: ret
C.Negate(UInt16)
L0000: movzx eax, cx
L0003: neg eax
L0005: movzx eax, ax
L0008: ret
C.Negate(Int16)
L0000: movsx rax, cx
L0004: neg eax
L0006: movsx rax, ax
L000a: ret
C.Negate(Byte)
L0000: movzx eax, cl
L0003: neg eax
L0005: movzx eax, al
L0008: ret
C.Negate(SByte)
L0000: movsx rax, cl
L0004: neg eax
L0006: movsx rax, al
L000a: ret
C.BitwiseAnd(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx edx, dx
L0006: and eax, edx
L0008: movzx eax, ax
L000b: ret
C.BitwiseAnd(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rdx, dx
L0008: and eax, edx
L000a: movsx rax, ax
L000e: ret
C.BitwiseAnd(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx edx, dl
L0006: and eax, edx
L0008: movzx eax, al
L000b: ret
C.BitwiseAnd(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rdx, dl
L0008: and eax, edx
L000a: movsx rax, al
L000e: ret
C.BitwiseOr(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx edx, dx
L0006: or eax, edx
L0008: movzx eax, ax
L000b: ret
C.BitwiseOr(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rdx, dx
L0008: or eax, edx
L000a: movsx rax, ax
L000e: ret
C.BitwiseOr(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx edx, dl
L0006: or eax, edx
L0008: movzx eax, al
L000b: ret
C.BitwiseOr(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rdx, dl
L0008: or eax, edx
L000a: movsx rax, al
L000e: ret
C.BitwiseXor(UInt16, UInt16)
L0000: movzx eax, cx
L0003: movzx edx, dx
L0006: xor eax, edx
L0008: movzx eax, ax
L000b: ret
C.BitwiseXor(Int16, Int16)
L0000: movsx rax, cx
L0004: movsx rdx, dx
L0008: xor eax, edx
L000a: movsx rax, ax
L000e: ret
C.BitwiseXor(Byte, Byte)
L0000: movzx eax, cl
L0003: movzx edx, dl
L0006: xor eax, edx
L0008: movzx eax, al
L000b: ret
C.BitwiseXor(SByte, SByte)
L0000: movsx rax, cl
L0004: movsx rdx, dl
L0008: xor eax, edx
L000a: movsx rax, al
L000e: ret
C.BitwiseNot(UInt16)
L0000: movzx eax, cx
L0003: not eax
L0005: movzx eax, ax
L0008: ret
C.BitwiseNot(Int16)
L0000: movsx rax, cx
L0004: not eax
L0006: movsx rax, ax
L000a: ret
C.BitwiseNot(Byte)
L0000: movzx eax, cl
L0003: not eax
L0005: movzx eax, al
L0008: ret
C.BitwiseNot(SByte)
L0000: movsx rax, cl
L0004: not eax
L0006: movsx rax, al
L000a: ret
C.ShiftRight(UInt16, Int32)
L0000: movzx eax, cx
L0003: mov ecx, edx
L0005: sar eax, cl
L0007: movzx eax, ax
L000a: ret
C.ShiftArithmeticRight(Int16, Int32)
L0000: movsx rax, cx
L0004: mov ecx, edx
L0006: sar eax, cl
L0008: movsx rax, ax
L000c: ret
C.ShiftRight(Byte, Int32)
L0000: movzx eax, cl
L0003: mov ecx, edx
L0005: sar eax, cl
L0007: movzx eax, al
L000a: ret
C.ShiftArithmeticRight(SByte, Int32)
L0000: movsx rax, cl
L0004: mov ecx, edx
L0006: sar eax, cl
L0008: movsx rax, al
L000c: ret
C.ShiftLeft(UInt16, Int32)
L0000: movzx eax, cx
L0003: mov ecx, edx
L0005: shl eax, cl
L0007: movzx eax, ax
L000a: ret
C.ShiftLeft(Int16, Int32)
L0000: movsx rax, cx
L0004: mov ecx, edx
L0006: shl eax, cl
L0008: movsx rax, ax
L000c: ret
C.ShiftLeft(Byte, Int32)
L0000: movzx eax, cl
L0003: mov ecx, edx
L0005: shl eax, cl
L0007: movzx eax, al
L000a: ret
C.ShiftLeft(SByte, Int32)
L0000: movsx rax, cl
L0004: mov ecx, edx
L0006: shl eax, cl
L0008: movsx rax, al
L000c: ret
C.DivideBy19(UInt16)
L0000: movzx edx, cx
L0003: mov ecx, 0x6bca1af3
L0008: mov eax, ecx
L000a: imul edx
L000c: mov eax, edx
L000e: shr eax, 0x1f
L0011: sar edx, 3
L0014: add eax, edx
L0016: movzx eax, ax
L0019: ret
C.DivideBy19(Int16)
L0000: movsx rdx, cx
L0004: mov ecx, 0x6bca1af3
L0009: mov eax, ecx
L000b: imul edx
L000d: mov eax, edx
L000f: shr eax, 0x1f
L0012: sar edx, 3
L0015: add eax, edx
L0017: movsx rax, ax
L001b: ret
C.DivideBy19(Byte)
L0000: movzx edx, cl
L0003: mov ecx, 0x6bca1af3
L0008: mov eax, ecx
L000a: imul edx
L000c: mov eax, edx
L000e: shr eax, 0x1f
L0011: sar edx, 3
L0014: add eax, edx
L0016: movzx eax, al
L0019: ret
C.DivideBy19(SByte)
L0000: movsx rdx, cl
L0004: mov ecx, 0x6bca1af3
L0009: mov eax, ecx
L000b: imul edx
L000d: mov eax, edx
L000f: shr eax, 0x1f
L0012: sar edx, 3
L0015: add eax, edx
L0017: movsx rax, al
L001b: ret
C.ModulusBy19(UInt16)
L0000: movzx ecx, cx
L0003: mov edx, 0x6bca1af3
L0008: mov eax, edx
L000a: imul ecx
L000c: mov eax, edx
L000e: shr eax, 0x1f
L0011: sar edx, 3
L0014: add eax, edx
L0016: imul eax, 0x13
L0019: sub ecx, eax
L001b: movzx eax, cx
L001e: ret
C.ModulusBy19(Int16)
L0000: movsx rcx, cx
L0004: mov edx, 0x6bca1af3
L0009: mov eax, edx
L000b: imul ecx
L000d: mov eax, edx
L000f: shr eax, 0x1f
L0012: sar edx, 3
L0015: add eax, edx
L0017: imul eax, 0x13
L001a: sub ecx, eax
L001c: movsx rax, cx
L0020: ret
C.ModulusBy19(Byte)
L0000: movzx ecx, cl
L0003: mov edx, 0x6bca1af3
L0008: mov eax, edx
L000a: imul ecx
L000c: mov eax, edx
L000e: shr eax, 0x1f
L0011: sar edx, 3
L0014: add eax, edx
L0016: imul eax, 0x13
L0019: sub ecx, eax
L001b: movzx eax, cl
L001e: ret
C.ModulusBy19(SByte)
L0000: movsx rcx, cl
L0004: mov edx, 0x6bca1af3
L0009: mov eax, edx
L000b: imul ecx
L000d: mov eax, edx
L000f: shr eax, 0x1f
L0012: sar edx, 3
L0015: add eax, edx
L0017: imul eax, 0x13
L001a: sub ecx, eax
L001c: movsx rax, cl
L0020: ret
Generated by SharpLab.io
C code
#include <stdint.h>
uint16_t Add(uint16_t left, uint16_t right) { return (uint16_t)(left + right);}
int16_t Add(int16_t left, int16_t right) { return (int16_t)(left + right);}
uint8_t Add(uint8_t left, uint8_t right) { return (uint8_t)(left + right);}
int8_t Add(int8_t left, int8_t right) { return (int8_t)(left + right);}
uint16_t Subtract(uint16_t left, uint16_t right) { return (uint16_t)(left - right);}
int16_t Subtract(int16_t left, int16_t right) { return (int16_t)(left - right);}
uint8_t Subtract(uint8_t left, uint8_t right) { return (uint8_t)(left - right);}
int8_t Subtract(int8_t left, int8_t right) { return (int8_t)(left - right);}
uint16_t Multiply(uint16_t left, uint16_t right) { return (uint16_t)(left * right);}
int16_t Multiply(int16_t left, int16_t right) { return (int16_t)(left * right);}
uint8_t Multiply(uint8_t left, uint8_t right) { return (uint8_t)(left * right);}
int8_t Multiply(int8_t left, int8_t right) { return (int8_t)(left * right);}
uint16_t Divide(uint16_t left, uint16_t right) { return (uint16_t)(left / right);}
int16_t Divide(int16_t left, int16_t right) { return (int16_t)(left / right);}
uint8_t Divide(uint8_t left, uint8_t right) { return (uint8_t)(left / right);}
int8_t Divide(int8_t left, int8_t right) { return (int8_t)(left / right);}
uint16_t Modulus(uint16_t left, uint16_t right) { return (uint16_t)(left % right);}
int16_t Modulus(int16_t left, int16_t right) { return (int16_t)(left % right);}
uint8_t Modulus(uint8_t left, uint8_t right) { return (uint8_t)(left % right);}
int8_t Modulus(int8_t left, int8_t right) { return (int8_t)(left % right);}
uint16_t Negate(uint16_t left) { return (uint16_t)(-left);}
int16_t Negate(int16_t left) { return (int16_t)(-left);}
uint8_t Negate(uint8_t left) { return (uint8_t)(-left);}
int8_t Negate(int8_t left) { return (int8_t)(-left);}
uint16_t BitwiseAnd(uint16_t left, uint16_t right) { return (uint16_t)(left & right);}
int16_t BitwiseAnd(int16_t left, int16_t right) { return (int16_t)(left & right);}
uint8_t BitwiseAnd(uint8_t left, uint8_t right) { return (uint8_t)(left & right);}
int8_t BitwiseAnd(int8_t left, int8_t right) { return (int8_t)(left & right);}
uint16_t BitwiseOr(uint16_t left, uint16_t right) { return (uint16_t)(left | right);}
int16_t BitwiseOr(int16_t left, int16_t right) { return (int16_t)(left | right);}
uint8_t BitwiseOr(uint8_t left, uint8_t right) { return (uint8_t)(left | right);}
int8_t BitwiseOr(int8_t left, int8_t right) { return (int8_t)(left | right);}
uint16_t BitwiseXor(uint16_t left, uint16_t right) { return (uint16_t)(left ^ right);}
int16_t BitwiseXor(int16_t left, int16_t right) { return (int16_t)(left ^ right);}
uint8_t BitwiseXor(uint8_t left, uint8_t right) { return (uint8_t)(left ^ right);}
int8_t BitwiseXor(int8_t left, int8_t right) { return (int8_t)(left ^ right);}
uint16_t BitwiseNot(uint16_t left) { return (uint16_t) ~left;}
int16_t BitwiseNot(int16_t left) { return (int16_t) ~left;}
uint8_t BitwiseNot(uint8_t left) { return (uint8_t) ~left;}
int8_t BitwiseNot(int8_t left) { return (int8_t) ~left;}
uint16_t ShiftRight(uint16_t left, int32_t right) { return (uint16_t)(left >> right);}
int16_t ShiftArithmeticRight(int16_t left, int32_t right) { return (int16_t)(left >> right);}
uint8_t ShiftRight(uint8_t left, int32_t right) { return (uint8_t)(left >> right);}
int8_t ShiftArithmeticRight(int8_t left, int32_t right) { return (int8_t)(left >> right);}
uint16_t ShiftLeft(uint16_t left, int32_t right) { return (uint16_t)(left << right);}
int16_t ShiftLeft(int16_t left, int32_t right) { return (int16_t)(left << right);}
uint8_t ShiftLeft(uint8_t left, int32_t right) { return (uint8_t)(left << right);}
int8_t ShiftLeft(int8_t left, int32_t right) { return (int8_t)(left << right);}
uint16_t DivideBy19(uint16_t left) { return (uint16_t)(left / 19);}
int16_t DivideBy19(int16_t left) { return (int16_t)(left / 19);}
uint8_t DivideBy19(uint8_t left) { return (uint8_t)(left / 19);}
int8_t DivideBy19(int8_t left) { return (int8_t)(left / 19);}
uint16_t ModulusBy19(uint16_t left) { return (uint16_t)(left % 19);}
int16_t ModulusBy19(int16_t left) { return (int16_t)(left % 19);}
uint8_t ModulusBy19(uint8_t left) { return (uint8_t)(left % 19);}
int8_t ModulusBy19(int8_t left) { return (int8_t)(left % 19);}
LLVM output assembly(Clang12.0 -O3 -ffast-math -march=x86-64-v3)
Add(unsigned short, unsigned short): # @Add(unsigned short, unsigned short)
lea eax, [rdi + rsi]
ret
Add(short, short): # @Add(short, short)
lea eax, [rdi + rsi]
ret
Add(unsigned char, unsigned char): # @Add(unsigned char, unsigned char)
lea eax, [rsi + rdi]
ret
Add(signed char, signed char): # @Add(signed char, signed char)
lea eax, [rsi + rdi]
ret
Subtract(unsigned short, unsigned short): # @Subtract(unsigned short, unsigned short)
mov eax, edi
sub eax, esi
ret
Subtract(short, short): # @Subtract(short, short)
mov eax, edi
sub eax, esi
ret
Subtract(unsigned char, unsigned char): # @Subtract(unsigned char, unsigned char)
mov eax, edi
sub al, sil
ret
Subtract(signed char, signed char): # @Subtract(signed char, signed char)
mov eax, edi
sub al, sil
ret
Multiply(unsigned short, unsigned short): # @Multiply(unsigned short, unsigned short)
mov eax, edi
imul eax, esi
ret
Multiply(short, short): # @Multiply(short, short)
mov eax, edi
imul eax, esi
ret
Multiply(unsigned char, unsigned char): # @Multiply(unsigned char, unsigned char)
mov eax, esi
mul dil
ret
Multiply(signed char, signed char): # @Multiply(signed char, signed char)
mov eax, esi
mul dil
ret
Divide(unsigned short, unsigned short): # @Divide(unsigned short, unsigned short)
mov eax, edi
xor edx, edx
div si
ret
Divide(short, short): # @Divide(short, short)
mov eax, edi
cdq
idiv esi
ret
Divide(unsigned char, unsigned char): # @Divide(unsigned char, unsigned char)
movzx eax, dil
div sil
ret
Divide(signed char, signed char): # @Divide(signed char, signed char)
mov eax, edi
cwd
idiv si
ret
Modulus(unsigned short, unsigned short): # @Modulus(unsigned short, unsigned short)
mov eax, edi
xor edx, edx
div si
mov eax, edx
ret
Modulus(short, short): # @Modulus(short, short)
mov eax, edi
cdq
idiv esi
mov eax, edx
ret
Modulus(unsigned char, unsigned char): # @Modulus(unsigned char, unsigned char)
movzx eax, dil
div sil
movzx eax, ah
ret
Modulus(signed char, signed char): # @Modulus(signed char, signed char)
mov eax, edi
cwd
idiv si
mov eax, edx
ret
Negate(unsigned short): # @Negate(unsigned short)
mov eax, edi
neg eax
ret
Negate(short): # @Negate(short)
mov eax, edi
neg eax
ret
Negate(unsigned char): # @Negate(unsigned char)
mov eax, edi
neg al
ret
Negate(signed char): # @Negate(signed char)
mov eax, edi
neg al
ret
BitwiseAnd(unsigned short, unsigned short): # @BitwiseAnd(unsigned short, unsigned short)
mov eax, edi
and eax, esi
ret
BitwiseAnd(short, short): # @BitwiseAnd(short, short)
mov eax, edi
and eax, esi
ret
BitwiseAnd(unsigned char, unsigned char): # @BitwiseAnd(unsigned char, unsigned char)
mov eax, edi
and eax, esi
ret
BitwiseAnd(signed char, signed char): # @BitwiseAnd(signed char, signed char)
mov eax, edi
and eax, esi
ret
BitwiseOr(unsigned short, unsigned short): # @BitwiseOr(unsigned short, unsigned short)
mov eax, edi
or eax, esi
ret
BitwiseOr(short, short): # @BitwiseOr(short, short)
mov eax, edi
or eax, esi
ret
BitwiseOr(unsigned char, unsigned char): # @BitwiseOr(unsigned char, unsigned char)
mov eax, edi
or eax, esi
ret
BitwiseOr(signed char, signed char): # @BitwiseOr(signed char, signed char)
mov eax, edi
or eax, esi
ret
BitwiseXor(unsigned short, unsigned short): # @BitwiseXor(unsigned short, unsigned short)
mov eax, edi
xor eax, esi
ret
BitwiseXor(short, short): # @BitwiseXor(short, short)
mov eax, edi
xor eax, esi
ret
BitwiseXor(unsigned char, unsigned char): # @BitwiseXor(unsigned char, unsigned char)
mov eax, edi
xor eax, esi
ret
BitwiseXor(signed char, signed char): # @BitwiseXor(signed char, signed char)
mov eax, edi
xor eax, esi
ret
BitwiseNot(unsigned short): # @BitwiseNot(unsigned short)
mov eax, edi
not eax
ret
BitwiseNot(short): # @BitwiseNot(short)
mov eax, edi
not eax
ret
BitwiseNot(unsigned char): # @BitwiseNot(unsigned char)
mov eax, edi
not al
ret
BitwiseNot(signed char): # @BitwiseNot(signed char)
mov eax, edi
not al
ret
ShiftRight(unsigned short, int): # @ShiftRight(unsigned short, int)
shrx eax, edi, esi
ret
ShiftArithmeticRight(short, int): # @ShiftArithmeticRight(short, int)
sarx eax, edi, esi
ret
ShiftRight(unsigned char, int): # @ShiftRight(unsigned char, int)
shrx eax, edi, esi
ret
ShiftArithmeticRight(signed char, int): # @ShiftArithmeticRight(signed char, int)
sarx eax, edi, esi
ret
ShiftLeft(unsigned short, int): # @ShiftLeft(unsigned short, int)
shlx eax, edi, esi
ret
ShiftLeft(short, int): # @ShiftLeft(short, int)
shlx eax, edi, esi
ret
ShiftLeft(unsigned char, int): # @ShiftLeft(unsigned char, int)
shlx eax, edi, esi
ret
ShiftLeft(signed char, int): # @ShiftLeft(signed char, int)
shlx eax, edi, esi
ret
DivideBy19(unsigned short): # @DivideBy19(unsigned short)
imul eax, edi, 55189
shr eax, 20
ret
DivideBy19(short): # @DivideBy19(short)
imul eax, edi, -10347
shr eax, 16
add eax, edi
movzx ecx, ax
movsx eax, cx
shr ecx, 15
sar eax, 4
add eax, ecx
ret
DivideBy19(unsigned char): # @DivideBy19(unsigned char)
lea eax, [rdi + 8*rdi]
lea eax, [rax + 2*rax]
shr eax, 9
ret
DivideBy19(signed char): # @DivideBy19(signed char)
lea eax, [rdi + 8*rdi]
lea ecx, [rax + 2*rax]
movzx eax, cx
shr ecx, 9
shr eax, 15
add al, cl
ret
ModulusBy19(unsigned short): # @ModulusBy19(unsigned short)
mov eax, edi
imul ecx, edi, 55189
shr ecx, 20
lea edx, [rcx + 8*rcx]
lea ecx, [rcx + 2*rdx]
sub eax, ecx
ret
ModulusBy19(short): # @ModulusBy19(short)
mov eax, edi
imul ecx, edi, -10347
shr ecx, 16
add ecx, edi
movzx ecx, cx
movsx edx, cx
shr ecx, 15
sar edx, 4
add edx, ecx
lea ecx, [rdx + 8*rdx]
lea ecx, [rdx + 2*rcx]
sub eax, ecx
ret
ModulusBy19(unsigned char): # @ModulusBy19(unsigned char)
mov eax, edi
lea ecx, [rax + 8*rax]
lea ecx, [rcx + 2*rcx]
shr ecx, 9
lea edx, [rcx + 8*rcx]
lea ecx, [rcx + 2*rdx]
sub al, cl
ret
ModulusBy19(signed char): # @ModulusBy19(signed char)
mov eax, edi
lea ecx, [rax + 8*rax]
lea ecx, [rcx + 2*rcx]
movzx edx, cx
shr ecx, 9
shr edx, 15
add dl, cl
movzx ecx, dl
lea edx, [rcx + 8*rcx]
lea ecx, [rcx + 2*rdx]
sub al, cl
ret
Generated by Compiler Explorer
GCC 11.0 output assembly(-O3 -ffast-math -march=x86-64-v3)
Add(unsigned short, unsigned short):
lea eax, [rsi+rdi]
ret
Add(short, short):
lea eax, [rsi+rdi]
ret
Add(unsigned char, unsigned char):
lea eax, [rsi+rdi]
ret
Add(signed char, signed char):
lea eax, [rsi+rdi]
ret
Subtract(unsigned short, unsigned short):
mov eax, edi
sub eax, esi
ret
Subtract(short, short):
mov eax, edi
sub eax, esi
ret
Subtract(unsigned char, unsigned char):
mov eax, edi
sub eax, esi
ret
Subtract(signed char, signed char):
mov eax, edi
sub eax, esi
ret
Multiply(unsigned short, unsigned short):
mov eax, esi
imul eax, edi
ret
Multiply(short, short):
mov eax, esi
imul eax, edi
ret
Multiply(unsigned char, unsigned char):
mov eax, esi
imul eax, edi
ret
Multiply(signed char, signed char):
mov eax, esi
imul eax, edi
ret
Divide(unsigned short, unsigned short):
mov eax, edi
xor edx, edx
div si
ret
Divide(short, short):
movsx eax, di
movsx esi, si
cdq
idiv esi
ret
Divide(unsigned char, unsigned char):
movzx eax, dil
div sil
ret
Divide(signed char, signed char):
movsx eax, dil
movsx esi, sil
cdq
idiv esi
ret
Modulus(unsigned short, unsigned short):
mov eax, edi
xor edx, edx
div si
mov eax, edx
ret
Modulus(short, short):
movsx eax, di
movsx esi, si
cdq
idiv esi
mov eax, edx
ret
Modulus(unsigned char, unsigned char):
movzx eax, dil
div sil
movzx eax, ah
ret
Modulus(signed char, signed char):
movsx eax, dil
movsx esi, sil
cdq
idiv esi
mov eax, edx
ret
Negate(unsigned short):
mov eax, edi
neg eax
ret
Negate(short):
mov eax, edi
neg eax
ret
Negate(unsigned char):
mov eax, edi
neg eax
ret
Negate(signed char):
mov eax, edi
neg eax
ret
BitwiseAnd(unsigned short, unsigned short):
mov eax, esi
and eax, edi
ret
BitwiseAnd(short, short):
mov eax, esi
and eax, edi
ret
BitwiseAnd(unsigned char, unsigned char):
mov eax, esi
and eax, edi
ret
BitwiseAnd(signed char, signed char):
mov eax, esi
and eax, edi
ret
BitwiseOr(unsigned short, unsigned short):
mov eax, esi
or eax, edi
ret
BitwiseOr(short, short):
mov eax, esi
or eax, edi
ret
BitwiseOr(unsigned char, unsigned char):
mov eax, esi
or eax, edi
ret
BitwiseOr(signed char, signed char):
mov eax, esi
or eax, edi
ret
BitwiseXor(unsigned short, unsigned short):
mov eax, esi
xor eax, edi
ret
BitwiseXor(short, short):
mov eax, esi
xor eax, edi
ret
BitwiseXor(unsigned char, unsigned char):
mov eax, esi
xor eax, edi
ret
BitwiseXor(signed char, signed char):
mov eax, esi
xor eax, edi
ret
BitwiseNot(unsigned short):
mov eax, edi
not eax
ret
BitwiseNot(short):
mov eax, edi
not eax
ret
BitwiseNot(unsigned char):
mov eax, edi
not eax
ret
BitwiseNot(signed char):
mov eax, edi
not eax
ret
ShiftRight(unsigned short, int):
movzx edi, di
sarx eax, edi, esi
ret
ShiftArithmeticRight(short, int):
movsx edi, di
sarx eax, edi, esi
ret
ShiftRight(unsigned char, int):
movzx edi, dil
sarx eax, edi, esi
ret
ShiftArithmeticRight(signed char, int):
movsx edi, dil
sarx eax, edi, esi
ret
ShiftLeft(unsigned short, int):
movzx edi, di
shlx eax, edi, esi
ret
ShiftLeft(short, int):
movsx edi, di
shlx eax, edi, esi
ret
ShiftLeft(unsigned char, int):
movzx edi, dil
shlx eax, edi, esi
ret
ShiftLeft(signed char, int):
movsx edi, dil
shlx eax, edi, esi
ret
DivideBy19(unsigned short):
movzx eax, di
imul eax, eax, 55189
shr eax, 20
ret
DivideBy19(short):
movsx eax, di
imul eax, eax, -10347
shr eax, 16
add eax, edi
sar di, 15
sar ax, 4
sub eax, edi
ret
DivideBy19(unsigned char):
mov edx, 27
mov eax, edi
mul dl
shr ax, 9
ret
DivideBy19(signed char):
mov eax, 27
imul dil
sar dil, 7
sar ax, 9
sub eax, edi
ret
ModulusBy19(unsigned short):
movzx eax, di
imul eax, eax, 55189
shr eax, 20
lea edx, [rax+rax*8]
lea edx, [rax+rdx*2]
mov eax, edi
sub eax, edx
ret
ModulusBy19(short):
movsx eax, di
mov edx, edi
imul eax, eax, -10347
sar dx, 15
shr eax, 16
add eax, edi
sar ax, 4
sub eax, edx
lea edx, [rax+rax*8]
lea edx, [rax+rdx*2]
mov eax, edi
sub eax, edx
ret
ModulusBy19(unsigned char):
mov eax, 27
mul dil
shr ax, 9
lea edx, [rax+rax*8]
lea edx, [rax+rdx*2]
mov eax, edi
sub eax, edx
ret
ModulusBy19(signed char):
mov eax, 27
mov edx, edi
imul dil
sar dl, 7
sar ax, 9
sub eax, edx
lea edx, [rax+rax*8]
lea edx, [rax+rdx*2]
mov eax, edi
sub eax, edx
ret
Generated by Compiler Explorer
LLVM suggests that the movzx
and movsx
instructions may sometimes be unnecessary.
O3 Clang/LLVM and O3 GCC have many odd optimizations, such as replacing constant multiplication with some lea
instructions, but I recommend generating code that is more similar to Clang/LLVM.
category:cq
theme:codegen
skill-level:intermediate
cost:large
impact:medium
Metadata
Metadata
Assignees
Labels
Type
Projects
Status
Done