-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ms] [llvm-ml] Allow PTR casting of registers to their own size #132751
base: main
Are you sure you want to change the base?
Conversation
MASM allows no-op casts on register operands; for example, `DWORD PTR eax` is a legal expression. Any other cast is an error. Fixes llvm#132084
@llvm/pr-subscribers-backend-x86 Author: Eric Astor (ericastor) ChangesMASM allows no-op casts on register operands; for example, Fixes #132084 Full diff: https://github.com/llvm/llvm-project/pull/132751.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a6285a55f4155..475b0016ace68 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -14,6 +14,9 @@
#include "MCTargetDesc/X86TargetStreamer.h"
#include "TargetInfo/X86TargetInfo.h"
#include "X86Operand.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/MC/MCRegister.h"
+#include "third_party/llvm/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +41,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cstdint>
#include <memory>
using namespace llvm;
@@ -1150,7 +1154,7 @@ class X86AsmParser : public MCTargetAsmParser {
X86::CondCode ParseConditionCode(StringRef CCode);
- bool ParseIntelMemoryOperandSize(unsigned &Size);
+ bool ParseIntelMemoryOperandSize(unsigned &Size, StringRef *SizeStr);
bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
MCRegister BaseReg, MCRegister IndexReg,
unsigned Scale, bool NonAbsMem, SMLoc Start,
@@ -2551,7 +2555,8 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
return false;
}
-bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
+bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
+ StringRef *SizeStr) {
Size = StringSwitch<unsigned>(getTok().getString())
.Cases("BYTE", "byte", 8)
.Cases("WORD", "word", 16)
@@ -2569,6 +2574,8 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
.Cases("ZMMWORD", "zmmword", 512)
.Default(0);
if (Size) {
+ if (SizeStr)
+ *SizeStr = getTok().getString();
const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
@@ -2577,6 +2584,31 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
return false;
}
+uint16_t RegSizeInBits(MCRegister RegNo, const MCRegisterInfo &MRI) {
+ uint16_t Size = 0;
+ if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo))
+ Size = 8;
+ else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo))
+ Size = 16;
+ else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo))
+ Size = 32;
+ else if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+ Size = 64;
+ else if (X86MCRegisterClasses[X86::RFP80RegClassID].contains(RegNo))
+ Size = 80;
+ else if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo))
+ Size = 128;
+ else if (X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo))
+ Size = 128;
+ else if (X86MCRegisterClasses[X86::VR256XRegClassID].contains(RegNo))
+ Size = 256;
+ else if (X86MCRegisterClasses[X86::VR512RegClassID].contains(RegNo))
+ Size = 512;
+ else
+ llvm_unreachable("Register without known register class");
+ return Size;
+}
+
bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -2584,7 +2616,8 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
// Parse optional Size directive.
unsigned Size;
- if (ParseIntelMemoryOperandSize(Size))
+ StringRef SizeStr;
+ if (ParseIntelMemoryOperandSize(Size, &SizeStr))
return true;
bool PtrInOperand = bool(Size);
@@ -2601,9 +2634,20 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
return Error(Start, "rip can only be used as a base register");
// A Register followed by ':' is considered a segment override
if (Tok.isNot(AsmToken::Colon)) {
- if (PtrInOperand)
- return Error(Start, "expected memory operand after 'ptr', "
- "found register operand instead");
+ if (PtrInOperand) {
+ if (!Parser.isParsingMasm())
+ return Error(Start, "expected memory operand after 'ptr', "
+ "found register operand instead");
+
+ // If we are parsing MASM, we are allowed to cast registers to their own
+ // sizes, but not to other types.
+ if (RegSizeInBits(RegNo, *getContext().getRegisterInfo()) != Size)
+ return Error(
+ Start,
+ "cannot cast register '" +
+ StringRef(getContext().getRegisterInfo()->getName(RegNo)) +
+ "' to '" + SizeStr + "'; size does not match");
+ }
Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
return false;
}
diff --git a/llvm/test/tools/llvm-ml/cast.asm b/llvm/test/tools/llvm-ml/cast.asm
new file mode 100644
index 0000000000000..2b4aaae88866e
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/cast.asm
@@ -0,0 +1,25 @@
+; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s
+
+.code
+
+mov byte ptr al, al
+mov al, byte ptr al
+; CHECK: mov al, al
+; CHECK-NEXT: mov al, al
+
+mov word ptr ax, ax
+mov ax, word ptr ax
+; CHECK: mov ax, ax
+; CHECK-NEXT: mov ax, ax
+
+mov dword ptr eax, eax
+mov eax, dword ptr eax
+; CHECK: mov eax, eax
+; CHECK-NEXT: mov eax, eax
+
+mov qword ptr rax, rax
+mov rax, qword ptr rax
+; CHECK: mov rax, rax
+; CHECK-NEXT: mov rax, rax
+
+END
diff --git a/llvm/test/tools/llvm-ml/cast_errors.asm b/llvm/test/tools/llvm-ml/cast_errors.asm
new file mode 100644
index 0000000000000..60cd9a4454ed8
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/cast_errors.asm
@@ -0,0 +1,41 @@
+; RUN: not llvm-ml -m64 -filetype=s %s /Fo /dev/null 2>&1 | FileCheck %s
+
+.code
+
+mov word ptr al, ax
+; CHECK: error: cannot cast register 'AL' to 'word'; size does not match
+
+mov dword ptr al, eax
+; CHECK: error: cannot cast register 'AL' to 'dword'; size does not match
+
+mov qword ptr al, rax
+; CHECK: error: cannot cast register 'AL' to 'qword'; size does not match
+
+mov byte ptr ax, al
+; CHECK: error: cannot cast register 'AX' to 'byte'; size does not match
+
+mov dword ptr ax, eax
+; CHECK: error: cannot cast register 'AX' to 'dword'; size does not match
+
+mov qword ptr ax, rax
+; CHECK: error: cannot cast register 'AX' to 'qword'; size does not match
+
+mov byte ptr eax, al
+; CHECK: error: cannot cast register 'EAX' to 'byte'; size does not match
+
+mov word ptr eax, ax
+; CHECK: error: cannot cast register 'EAX' to 'word'; size does not match
+
+mov qword ptr eax, rax
+; CHECK: error: cannot cast register 'EAX' to 'qword'; size does not match
+
+mov byte ptr rax, al
+; CHECK: error: cannot cast register 'RAX' to 'byte'; size does not match
+
+mov word ptr rax, ax
+; CHECK: error: cannot cast register 'RAX' to 'word'; size does not match
+
+mov dword ptr rax, eax
+; CHECK: error: cannot cast register 'RAX' to 'dword'; size does not match
+
+END
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -2577,14 +2584,40 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { | |||
return false; | |||
} | |||
|
|||
uint16_t RegSizeInBits(MCRegister RegNo, const MCRegisterInfo &MRI) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would suggest swapping the order of the parameters - in theory, this function would be defined within the MCRegisterInfo
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed that I'd very much like to see this defined there. But for now... swapped.
uint16_t Size = 0; | ||
if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo)) | ||
Size = 8; | ||
else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo)) | ||
Size = 16; | ||
else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo)) | ||
Size = 32; | ||
else if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo)) | ||
Size = 64; | ||
else if (X86MCRegisterClasses[X86::RFP80RegClassID].contains(RegNo)) | ||
Size = 80; | ||
else if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo)) | ||
Size = 128; | ||
else if (X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo)) | ||
Size = 128; | ||
else if (X86MCRegisterClasses[X86::VR256XRegClassID].contains(RegNo)) | ||
Size = 256; | ||
else if (X86MCRegisterClasses[X86::VR512RegClassID].contains(RegNo)) | ||
Size = 512; | ||
else | ||
llvm_unreachable("Register without known register class"); | ||
return Size; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that the coding style for LLVM would dictate the early return here. Rather than assigning and returning Size
, just simply return the value. This would simply have the last statement be the llvm_unreachable
.
uint16_t RegisterSizeInBits(const MCRegisterInfo &MRI, MCRegister RegNo) {
if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo))
return 8;
...
llvm_unreachable("Register without known register class");
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Swapped to an early-return style; thanks!
@@ -2569,6 +2574,8 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { | |||
.Cases("ZMMWORD", "zmmword", 512) | |||
.Default(0); | |||
if (Size) { | |||
if (SizeStr) | |||
*SizeStr = getTok().getString(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This gets the raw token spelling right? The use of that for the diagnostics seems less than ideal (IIRC Microsoft doesn't treat the tokens as case sensitive).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed; I'm still pulling out the StringRef for the token, but I've switched the diagnostic to print the upper-case version as a standardization step.
Start, | ||
"cannot cast register '" + | ||
StringRef(getContext().getRegisterInfo()->getName(RegNo)) + | ||
"' to '" + SizeStr + "'; size does not match"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if the size class in the diagnostic message is helpful. I don't think that
cannot cast register 'AL' to 'word'; size does not match
is particularly helpful. Perhaps something more along the lines of:
8-bit GPR 'AX' cannot be used as a 32-bit GPR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think the size class is helpful to MASM users. However, I've added a bit-width specification to the diagnostic. WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wouldn't mind getting a second opinion on the diagnostic that we emit.
This seems pretty close! I think that once we get another pair of eyes on the diagnostics, this should be ready. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW, the diagnostic text looks reasonable to me.
if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo) || | ||
X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VR128RegClassID is a subset of X86::VR128XRegClassID, checking the latter is enough. However, we cannot deduce the size of a XMM register, because it can be f16/f32/f64 or a 128-bit vector.
MASM allows no-op casts on register operands; for example,
DWORD PTR eax
is a legal expression. Any other cast is an error.Fixes #132084