Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ms] [llvm-ml] Allow PTR casting of registers to their own size #132751

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 49 additions & 6 deletions llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
#include "MCTargetDesc/X86TargetStreamer.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86Operand.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
Expand All @@ -27,6 +29,7 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
Expand All @@ -38,6 +41,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
#include <memory>

using namespace llvm;
Expand Down Expand Up @@ -1150,7 +1154,7 @@ class X86AsmParser : public MCTargetAsmParser {

X86::CondCode ParseConditionCode(StringRef CCode);

bool ParseIntelMemoryOperandSize(unsigned &Size);
bool ParseIntelMemoryOperandSize(unsigned &Size, StringRef *SizeStr);
bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
MCRegister BaseReg, MCRegister IndexReg,
unsigned Scale, bool NonAbsMem, SMLoc Start,
Expand Down Expand Up @@ -2551,7 +2555,8 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
return false;
}

bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
StringRef *SizeStr) {
Size = StringSwitch<unsigned>(getTok().getString())
.Cases("BYTE", "byte", 8)
.Cases("WORD", "word", 16)
Expand All @@ -2569,6 +2574,8 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
.Cases("ZMMWORD", "zmmword", 512)
.Default(0);
if (Size) {
if (SizeStr)
*SizeStr = getTok().getString();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gets the raw token spelling right? The use of that for the diagnostics seems less than ideal (IIRC Microsoft doesn't treat the tokens as case sensitive).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed; I'm still pulling out the StringRef for the token, but I've switched the diagnostic to print the upper-case version as a standardization step.

const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
Expand All @@ -2577,14 +2584,36 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
return false;
}

uint16_t RegSizeInBits(const MCRegisterInfo &MRI, MCRegister RegNo) {
if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo))
return 8;
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo))
return 16;
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo))
return 32;
if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
return 64;
if (X86MCRegisterClasses[X86::RFP80RegClassID].contains(RegNo))
return 80;
if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo) ||
X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo))
Comment on lines +2598 to +2599
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VR128RegClassID is a subset of X86::VR128XRegClassID, checking the latter is enough. However, we cannot deduce the size of a XMM register, because it can be f16/f32/f64 or a 128-bit vector.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point, and I'm not entirely sure how MASM handles it for the purposes of this "no-op PTR cast" handling. Is it true for any of the other vector registers, or just the XMM set?

Once I understand that, I think I should (for now) restrict this no-op PTR cast support to only the registers where we can clearly deduce the size correctly. We can extend it if we find cases where MASM actually handles this.

return 128;
if (X86MCRegisterClasses[X86::VR256XRegClassID].contains(RegNo))
return 256;
if (X86MCRegisterClasses[X86::VR512RegClassID].contains(RegNo))
return 512;
llvm_unreachable("Register without known register class");
}

bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;

// Parse optional Size directive.
unsigned Size;
if (ParseIntelMemoryOperandSize(Size))
StringRef SizeStr;
if (ParseIntelMemoryOperandSize(Size, &SizeStr))
return true;
bool PtrInOperand = bool(Size);

Expand All @@ -2601,9 +2630,23 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
return Error(Start, "rip can only be used as a base register");
// A Register followed by ':' is considered a segment override
if (Tok.isNot(AsmToken::Colon)) {
if (PtrInOperand)
return Error(Start, "expected memory operand after 'ptr', "
"found register operand instead");
if (PtrInOperand) {
if (!Parser.isParsingMasm())
return Error(Start, "expected memory operand after 'ptr', "
"found register operand instead");

// If we are parsing MASM, we are allowed to cast registers to their own
// sizes, but not to other types.
uint16_t RegSize =
RegSizeInBits(*getContext().getRegisterInfo(), RegNo);
if (RegSize != Size)
return Error(
Start,
std::to_string(RegSize) + "-bit register '" +
StringRef(getContext().getRegisterInfo()->getName(RegNo)) +
"' cannot be used as a " + std::to_string(Size) + "-bit " +
SizeStr.upper());
}
Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
return false;
}
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/tools/llvm-ml/cast.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s

.code

mov byte ptr al, al
mov al, byte ptr al
; CHECK: mov al, al
; CHECK-NEXT: mov al, al

mov word ptr ax, ax
mov ax, word ptr ax
; CHECK: mov ax, ax
; CHECK-NEXT: mov ax, ax

mov dword ptr eax, eax
mov eax, dword ptr eax
; CHECK: mov eax, eax
; CHECK-NEXT: mov eax, eax

mov qword ptr rax, rax
mov rax, qword ptr rax
; CHECK: mov rax, rax
; CHECK-NEXT: mov rax, rax

END
41 changes: 41 additions & 0 deletions llvm/test/tools/llvm-ml/cast_errors.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
; RUN: not llvm-ml -m64 -filetype=s %s /Fo /dev/null 2>&1 | FileCheck %s

.code

mov word ptr al, ax
; CHECK: error: 8-bit register 'AL' cannot be used as a 16-bit WORD

mov dword ptr al, eax
; CHECK: error: 8-bit register 'AL' cannot be used as a 32-bit DWORD

mov qword ptr al, rax
; CHECK: error: 8-bit register 'AL' cannot be used as a 64-bit QWORD

mov byte ptr ax, al
; CHECK: error: 16-bit register 'AX' cannot be used as a 8-bit BYTE

mov dword ptr ax, eax
; CHECK: error: 16-bit register 'AX' cannot be used as a 32-bit DWORD

mov qword ptr ax, rax
; CHECK: error: 16-bit register 'AX' cannot be used as a 64-bit QWORD

mov byte ptr eax, al
; CHECK: error: 32-bit register 'EAX' cannot be used as a 8-bit BYTE

mov word ptr eax, ax
; CHECK: error: 32-bit register 'EAX' cannot be used as a 16-bit WORD

mov qword ptr eax, rax
; CHECK: error: 32-bit register 'EAX' cannot be used as a 64-bit QWORD

mov byte ptr rax, al
; CHECK: error: 64-bit register 'RAX' cannot be used as a 8-bit BYTE

mov word ptr rax, ax
; CHECK: error: 64-bit register 'RAX' cannot be used as a 16-bit WORD

mov dword ptr rax, eax
; CHECK: error: 64-bit register 'RAX' cannot be used as a 32-bit DWORD

END