Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 8647750

Browse files
committed
Add X86 code emitter support AVX encoded MRMDestReg instructions.
Previously we weren't skipping the VVVV encoded register. Based on patch by Michael Liao. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177221 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e0c489d commit 8647750

File tree

4 files changed

+94
-32
lines changed

4 files changed

+94
-32
lines changed

lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -674,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
674674
// MRMDestReg instructions forms:
675675
// dst(ModR/M), src(ModR/M)
676676
// dst(ModR/M), src(ModR/M), imm8
677-
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
677+
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
678+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
678679
VEX_B = 0x0;
679-
if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
680+
CurOp++;
681+
682+
if (HasVEX_4V)
683+
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
684+
685+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
680686
VEX_R = 0x0;
681687
break;
682688
case X86II::MRM0r: case X86II::MRM1r:
@@ -1046,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
10461052

10471053
case X86II::MRMDestReg:
10481054
EmitByte(BaseOpcode, CurByte, OS);
1055+
SrcRegNum = CurOp + 1;
1056+
1057+
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1058+
++SrcRegNum;
1059+
10491060
EmitRegModRMByte(MI.getOperand(CurOp),
1050-
GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
1051-
CurOp += 2;
1061+
GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
1062+
CurOp = SrcRegNum + 1;
10521063
break;
10531064

10541065
case X86II::MRMDestMem:

lib/Target/X86/X86CodeEmitter.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
10471047
// MRMDestReg instructions forms:
10481048
// dst(ModR/M), src(ModR/M)
10491049
// dst(ModR/M), src(ModR/M), imm8
1050-
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
1050+
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
1051+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
10511052
VEX_B = 0x0;
1052-
if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
1053+
CurOp++;
1054+
1055+
if (HasVEX_4V)
1056+
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
1057+
1058+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
10531059
VEX_R = 0x0;
10541060
break;
10551061
case X86II::MRM0r: case X86II::MRM1r:
@@ -1284,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
12841290

12851291
case X86II::MRMDestReg: {
12861292
MCE.emitByte(BaseOpcode);
1293+
1294+
unsigned SrcRegNum = CurOp+1;
1295+
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1296+
SrcRegNum++;
1297+
12871298
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
1288-
getX86RegNum(MI.getOperand(CurOp+1).getReg()));
1289-
CurOp += 2;
1299+
getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
1300+
CurOp = SrcRegNum + 1;
12901301
break;
12911302
}
12921303
case X86II::MRMDestMem: {

lib/Target/X86/X86MCInstLower.cpp

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -417,35 +417,44 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
417417
case X86::VMOVDQAYrr:
418418
case X86::VMOVDQUrr:
419419
case X86::VMOVDQUYrr:
420-
case X86::VMOVSDrr:
421-
case X86::VMOVSSrr:
422420
case X86::VMOVUPDrr:
423421
case X86::VMOVUPDYrr:
424422
case X86::VMOVUPSrr:
425423
case X86::VMOVUPSYrr: {
426-
if (X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
427-
!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()))
428-
break;
429-
430-
unsigned NewOpc;
431-
switch (OutMI.getOpcode()) {
432-
default: llvm_unreachable("Invalid opcode");
433-
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
434-
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
435-
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
436-
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
437-
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
438-
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
439-
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
440-
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
441-
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
442-
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
443-
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
444-
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
445-
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
446-
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
424+
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
425+
X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
426+
unsigned NewOpc;
427+
switch (OutMI.getOpcode()) {
428+
default: llvm_unreachable("Invalid opcode");
429+
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
430+
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
431+
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
432+
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
433+
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
434+
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
435+
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
436+
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
437+
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
438+
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
439+
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
440+
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
441+
}
442+
OutMI.setOpcode(NewOpc);
443+
}
444+
break;
445+
}
446+
case X86::VMOVSDrr:
447+
case X86::VMOVSSrr: {
448+
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
449+
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
450+
unsigned NewOpc;
451+
switch (OutMI.getOpcode()) {
452+
default: llvm_unreachable("Invalid opcode");
453+
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
454+
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
455+
}
456+
OutMI.setOpcode(NewOpc);
447457
}
448-
OutMI.setOpcode(NewOpc);
449458
break;
450459
}
451460

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx-i -show-mc-encoding
2+
3+
; ModuleID = 'bugpoint-reduced-simplified.bc'
4+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5+
target triple = "x86_64-apple-macosx10.8.0"
6+
7+
@b = external global [8 x float], align 32
8+
@e = external global [8 x float], align 16
9+
10+
define void @main() #0 {
11+
entry:
12+
%0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
13+
%bitcast.i = extractelement <8 x float> %0, i32 0
14+
%vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
15+
%vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
16+
%vecinit3.i.i = insertelement <4 x float> %vecinit2.i.i, float 0.000000e+00, i32 2
17+
%vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
18+
%1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
19+
%vecext.i.i = extractelement <4 x float> %1, i32 0
20+
store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
21+
unreachable
22+
}
23+
24+
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
25+
26+
attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
27+
attributes #1 = { nounwind readnone }
28+
attributes #2 = { nounwind }
29+
30+
!0 = metadata !{metadata !"omnipotent char", metadata !1}
31+
!1 = metadata !{metadata !"Simple C/C++ TBAA"}

0 commit comments

Comments
 (0)