Skip to content

Commit

Permalink
[maglev] Tailcall to a trampoline if we need process tiering
Browse files Browse the repository at this point in the history
This massively reduces Maglev's inlined prologue.

Bug: v8:7700
Change-Id: If0462a70fb25d47df8e042c24c54d75470980be7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4664650
Auto-Submit: Victor Gomes <victorgomes@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Commit-Queue: Victor Gomes <victorgomes@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/main@{#88678}
  • Loading branch information
victorgomes authored and V8 LUCI CQ committed Jul 5, 2023
1 parent b25f3b9 commit fec1b61
Show file tree
Hide file tree
Showing 17 changed files with 168 additions and 74 deletions.
2 changes: 2 additions & 0 deletions src/builtins/builtins-definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ namespace internal {
ASM(MaglevOnStackReplacement, OnStackReplacement) \
ASM(MaglevFunctionEntryStackCheck_WithoutNewTarget, Void) \
ASM(MaglevFunctionEntryStackCheck_WithNewTarget, Void) \
ASM(MaglevOptimizeCodeOrTailCallOptimizedCodeSlot, \
MaglevOptimizeCodeOrTailCallOptimizedCodeSlot) \
\
/* Code life-cycle */ \
TFC(CompileLazy, JSTrampoline) \
Expand Down
18 changes: 16 additions & 2 deletions src/builtins/builtins-internal-gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "src/builtins/builtins.h"
#include "src/codegen/code-stub-assembler.h"
#include "src/codegen/interface-descriptors-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/macro-assembler-inl.h"
#include "src/common/globals.h"
#include "src/execution/frame-constants.h"
#include "src/heap/memory-chunk.h"
Expand Down Expand Up @@ -1368,12 +1368,26 @@ void Builtins::Generate_MaglevOnStackReplacement(MacroAssembler* masm) {
}
#endif // V8_TARGET_ARCH_X64

#ifndef V8_ENABLE_MAGLEV
#ifdef V8_ENABLE_MAGLEV
void Builtins::Generate_MaglevOptimizeCodeOrTailCallOptimizedCodeSlot(
MacroAssembler* masm) {
using D = MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor;
Register flags = D::GetRegisterParameter(D::kFlags);
Register feedback_vector = D::GetRegisterParameter(D::kFeedbackVector);
masm->AssertFeedbackVector(feedback_vector);
masm->OptimizeCodeOrTailCallOptimizedCodeSlot(flags, feedback_vector);
masm->Trap();
}
#else
// static
void Builtins::Generate_MaglevFunctionEntryStackCheck(MacroAssembler* masm,
bool save_new_target) {
masm->Trap();
}
void Builtins::Generate_MaglevOptimizeCodeOrTailCallOptimizedCodeSlot(
MacroAssembler* masm) {
masm->Trap();
}
#endif // V8_ENABLE_MAGLEV

void Builtins::Generate_MaglevFunctionEntryStackCheck_WithoutNewTarget(
Expand Down
3 changes: 2 additions & 1 deletion src/builtins/x64/builtins-x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,8 @@ void Builtins::Generate_InterpreterEntryTrampoline(

#ifndef V8_JITLESS
__ bind(&flags_need_processing);
__ OptimizeCodeOrTailCallOptimizedCodeSlot(feedback_vector, closure);
__ OptimizeCodeOrTailCallOptimizedCodeSlot(feedback_vector, closure,
JumpMode::kJump);

__ bind(&is_baseline);
{
Expand Down
11 changes: 11 additions & 0 deletions src/codegen/arm/interface-descriptors-arm-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ constexpr Register TypeConversionDescriptor::ArgumentRegister() { return r0; }
// static
constexpr auto TypeofDescriptor::registers() { return RegisterArray(r0); }

// static
constexpr Register
MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::FlagsRegister() {
return r2;
}
// static
constexpr Register MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::
FeedbackVectorRegister() {
return r5;
}

// static
constexpr auto CallTrampolineDescriptor::registers() {
// r0 : number of arguments
Expand Down
16 changes: 12 additions & 4 deletions src/codegen/arm/macro-assembler-arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1993,9 +1993,8 @@ void MacroAssembler::GenerateTailCallToReturnedCode(

// Read off the flags in the feedback vector and check if there
// is optimized code or a tiering state that needs to be processed.
void MacroAssembler::LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing) {
Condition MacroAssembler::LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind) {
ASM_CODE_COMMENT(this);
DCHECK(!AreAliased(flags, feedback_vector));
DCHECK(CodeKindCanTierUp(current_code_kind));
Expand All @@ -2007,7 +2006,16 @@ void MacroAssembler::LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
kFlagsMask |= FeedbackVector::kFlagsMaybeHasMaglevCode;
}
tst(flags, Operand(kFlagsMask));
b(ne, flags_need_processing);
return ne;
}

void MacroAssembler::LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing) {
ASM_CODE_COMMENT(this);
b(LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(flags, feedback_vector,
current_code_kind),
flags_need_processing);
}

void MacroAssembler::OptimizeCodeOrTailCallOptimizedCodeSlot(
Expand Down
2 changes: 2 additions & 0 deletions src/codegen/arm/macro-assembler-arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,8 @@ class V8_EXPORT_PRIVATE MacroAssembler : public MacroAssemblerBase {
void ReplaceClosureCodeWithOptimizedCode(Register optimized_code,
Register closure);
void GenerateTailCallToReturnedCode(Runtime::FunctionId function_id);
Condition LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind);
void LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing);
Expand Down
11 changes: 11 additions & 0 deletions src/codegen/arm64/interface-descriptors-arm64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ constexpr Register BaselineLeaveFrameDescriptor::WeightRegister() { return x4; }
// static
constexpr Register TypeConversionDescriptor::ArgumentRegister() { return x0; }

// static
constexpr Register
MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::FlagsRegister() {
return x8;
}
// static
constexpr Register MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::
FeedbackVectorRegister() {
return x9;
}

// static
constexpr auto TypeofDescriptor::registers() { return RegisterArray(x0); }

Expand Down
17 changes: 13 additions & 4 deletions src/codegen/arm64/macro-assembler-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1501,9 +1501,8 @@ void MacroAssembler::GenerateTailCallToReturnedCode(

// Read off the flags in the feedback vector and check if there
// is optimized code or a tiering state that needs to be processed.
void MacroAssembler::LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing) {
Condition MacroAssembler::LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind) {
ASM_CODE_COMMENT(this);
DCHECK(!AreAliased(flags, feedback_vector));
DCHECK(CodeKindCanTierUp(current_code_kind));
Expand All @@ -1514,7 +1513,17 @@ void MacroAssembler::LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
if (current_code_kind != CodeKind::MAGLEV) {
kFlagsMask |= FeedbackVector::kFlagsMaybeHasMaglevCode;
}
TestAndBranchIfAnySet(flags, kFlagsMask, flags_need_processing);
Tst(flags, kFlagsMask);
return ne;
}

void MacroAssembler::LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing) {
ASM_CODE_COMMENT(this);
B(LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(flags, feedback_vector,
current_code_kind),
flags_need_processing);
}

void MacroAssembler::OptimizeCodeOrTailCallOptimizedCodeSlot(
Expand Down
2 changes: 2 additions & 0 deletions src/codegen/arm64/macro-assembler-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,8 @@ class V8_EXPORT_PRIVATE MacroAssembler : public MacroAssemblerBase {
void ReplaceClosureCodeWithOptimizedCode(Register optimized_code,
Register closure);
void GenerateTailCallToReturnedCode(Runtime::FunctionId function_id);
Condition LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind);
void LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register flags, Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing);
Expand Down
10 changes: 10 additions & 0 deletions src/codegen/interface-descriptors-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,16 @@ constexpr auto OnStackReplacementDescriptor::registers() {
#endif
}

// static
constexpr auto
MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::registers() {
#ifdef V8_ENABLE_MAGLEV
return RegisterArray(FlagsRegister(), FeedbackVectorRegister());
#else
return DefaultRegisterArray();
#endif
}

// static
constexpr Register OnStackReplacementDescriptor::MaybeTargetCodeRegister() {
// Picking the first register on purpose because it's convenient that this
Expand Down
16 changes: 16 additions & 0 deletions src/codegen/interface-descriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ namespace internal {
V(LookupWithVector) \
V(LookupTrampoline) \
V(LookupBaseline) \
V(MaglevOptimizeCodeOrTailCallOptimizedCodeSlot) \
V(NewHeapNumber) \
V(NoContext) \
V(OnStackReplacement) \
Expand Down Expand Up @@ -942,6 +943,21 @@ class LookupBaselineDescriptor
DECLARE_DESCRIPTOR(LookupBaselineDescriptor)
};

class MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor
: public StaticCallInterfaceDescriptor<
MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor> {
public:
DEFINE_PARAMETERS_NO_CONTEXT(kFlags, kFeedbackVector)
DEFINE_PARAMETER_TYPES(MachineType::Int32(), // kFlags
MachineType::TaggedPointer()) // kFeedbackVector
DECLARE_DESCRIPTOR(MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor)

static constexpr inline Register FlagsRegister();
static constexpr inline Register FeedbackVectorRegister();

static constexpr inline auto registers();
};

class StoreDescriptor : public StaticCallInterfaceDescriptor<StoreDescriptor> {
public:
DEFINE_PARAMETERS(kReceiver, kName, kValue, kSlot)
Expand Down
11 changes: 11 additions & 0 deletions src/codegen/x64/interface-descriptors-x64-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,17 @@ constexpr Register BaselineLeaveFrameDescriptor::WeightRegister() {
return rcx;
}

// static
constexpr Register
MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::FlagsRegister() {
return r8;
}
// static
constexpr Register MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor::
FeedbackVectorRegister() {
return r9;
}

// static
constexpr Register TypeConversionDescriptor::ArgumentRegister() { return rax; }

Expand Down
15 changes: 11 additions & 4 deletions src/codegen/x64/macro-assembler-x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -909,9 +909,8 @@ void MacroAssembler::ReplaceClosureCodeWithOptimizedCode(

// Read off the flags in the feedback vector and check if there
// is optimized code or a tiering state that needs to be processed.
void MacroAssembler::CheckFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing) {
Condition MacroAssembler::CheckFeedbackVectorFlagsNeedsProcessing(
Register feedback_vector, CodeKind current_code_kind) {
ASM_CODE_COMMENT(this);
DCHECK(CodeKindCanTierUp(current_code_kind));
uint32_t kFlagsMask = FeedbackVector::kFlagsTieringStateIsAnyRequested |
Expand All @@ -922,7 +921,15 @@ void MacroAssembler::CheckFeedbackVectorFlagsAndJumpIfNeedsProcessing(
}
testw(FieldOperand(feedback_vector, FeedbackVector::kFlagsOffset),
Immediate(kFlagsMask));
j(not_zero, flags_need_processing);
return not_zero;
}

void MacroAssembler::CheckFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing) {
ASM_CODE_COMMENT(this);
j(CheckFeedbackVectorFlagsNeedsProcessing(feedback_vector, current_code_kind),
flags_need_processing);
}

void MacroAssembler::OptimizeCodeOrTailCallOptimizedCodeSlot(
Expand Down
14 changes: 11 additions & 3 deletions src/codegen/x64/macro-assembler-x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -871,12 +871,20 @@ class V8_EXPORT_PRIVATE MacroAssembler
Register slot_address);
void GenerateTailCallToReturnedCode(Runtime::FunctionId function_id,
JumpMode jump_mode = JumpMode::kJump);
Condition CheckFeedbackVectorFlagsNeedsProcessing(Register feedback_vector,
CodeKind current_code_kind);
void CheckFeedbackVectorFlagsAndJumpIfNeedsProcessing(
Register feedback_vector, CodeKind current_code_kind,
Label* flags_need_processing);
void OptimizeCodeOrTailCallOptimizedCodeSlot(
Register feedback_vector, Register closure,
JumpMode jump_mode = JumpMode::kJump);
void OptimizeCodeOrTailCallOptimizedCodeSlot(Register feedback_vector,
Register closure,
JumpMode jump_mode);
// For compatibility with other archs.
void OptimizeCodeOrTailCallOptimizedCodeSlot(Register flags,
Register feedback_vector) {
OptimizeCodeOrTailCallOptimizedCodeSlot(
feedback_vector, kJSFunctionRegister, JumpMode::kJump);
}

// Abort execution if argument is not a Constructor, enabled via --debug-code.
void AssertConstructor(Register object) NOOP_UNLESS_DEBUG_CODE;
Expand Down
35 changes: 17 additions & 18 deletions src/maglev/arm/maglev-assembler-arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,27 +135,26 @@ void MaglevAssembler::Prologue(Graph* graph) {
}

// Tiering support.
// TODO(jgruber): Extract to a builtin.
if (v8_flags.turbofan) {
ScratchRegisterScope temps(this);
Register flags = temps.Acquire();
Register feedback_vector = temps.Acquire();

Label* deferred_flags_need_processing = MakeDeferredCode(
[](MaglevAssembler* masm, Register flags, Register feedback_vector) {
ASM_CODE_COMMENT_STRING(masm, "Optimized marker check");
// TODO(leszeks): This could definitely be a builtin that we
// tail-call.
__ OptimizeCodeOrTailCallOptimizedCodeSlot(flags, feedback_vector);
__ Trap();
},
flags, feedback_vector);

using D = MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor;
Register flags = D::GetRegisterParameter(D::kFlags);
Register feedback_vector = D::GetRegisterParameter(D::kFeedbackVector);
DCHECK(!AreAliased(flags, feedback_vector, kJavaScriptCallArgCountRegister,
kJSFunctionRegister, kContextRegister,
kJavaScriptCallNewTargetRegister));
DCHECK(!temps.Available().has(flags));
DCHECK(!temps.Available().has(feedback_vector));
Move(feedback_vector,
compilation_info()->toplevel_compilation_unit()->feedback().object());
LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
flags, feedback_vector, CodeKind::MAGLEV,
deferred_flags_need_processing);
Condition needs_processing =
LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(flags, feedback_vector,
CodeKind::MAGLEV);
// Tail call on Arm produces 3 instructions, so we emit that in deferred
// code.
JumpToDeferredIf(needs_processing, [](MaglevAssembler* masm) {
__ TailCallBuiltin(
Builtin::kMaglevOptimizeCodeOrTailCallOptimizedCodeSlot);
});
}

EnterFrame(StackFrame::MAGLEV);
Expand Down
31 changes: 13 additions & 18 deletions src/maglev/arm64/maglev-assembler-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,27 +144,22 @@ void MaglevAssembler::Prologue(Graph* graph) {
}

// Tiering support.
// TODO(jgruber): Extract to a builtin.
if (v8_flags.turbofan) {
ScratchRegisterScope temps(this);
Register flags = temps.Acquire();
Register feedback_vector = temps.Acquire();

Label* deferred_flags_need_processing = MakeDeferredCode(
[](MaglevAssembler* masm, Register flags, Register feedback_vector) {
ASM_CODE_COMMENT_STRING(masm, "Optimized marker check");
// TODO(leszeks): This could definitely be a builtin that we
// tail-call.
__ OptimizeCodeOrTailCallOptimizedCodeSlot(flags, feedback_vector);
__ Trap();
},
flags, feedback_vector);

using D = MaglevOptimizeCodeOrTailCallOptimizedCodeSlotDescriptor;
Register flags = D::GetRegisterParameter(D::kFlags);
Register feedback_vector = D::GetRegisterParameter(D::kFeedbackVector);
DCHECK(!AreAliased(flags, feedback_vector, kJavaScriptCallArgCountRegister,
kJSFunctionRegister, kContextRegister,
kJavaScriptCallNewTargetRegister));
DCHECK(!temps.Available().has(flags));
DCHECK(!temps.Available().has(feedback_vector));
Move(feedback_vector,
compilation_info()->toplevel_compilation_unit()->feedback().object());
LoadFeedbackVectorFlagsAndJumpIfNeedsProcessing(
flags, feedback_vector, CodeKind::MAGLEV,
deferred_flags_need_processing);
Condition needs_processing =
LoadFeedbackVectorFlagsAndCheckIfNeedsProcessing(flags, feedback_vector,
CodeKind::MAGLEV);
TailCallBuiltin(Builtin::kMaglevOptimizeCodeOrTailCallOptimizedCodeSlot,
needs_processing);
}

EnterFrame(StackFrame::MAGLEV);
Expand Down
Loading

0 comments on commit fec1b61

Please sign in to comment.