From 42f54145c38f1b357fc3d12a0f71ed37552edb11 Mon Sep 17 00:00:00 2001 From: Daryl Maier Date: Thu, 16 May 2024 14:13:57 -0400 Subject: [PATCH] x86 zero initialization improvements for arrays Avoid REP STOS zero initialization for arrays whose length is below a prescribed threshold checked at runtime. Use faster GPR stores instead. Move REP STOS initialization out of line. Signed-off-by: Daryl Maier --- .../compiler/x/codegen/J9TreeEvaluator.cpp | 91 ++++++++++++++++--- 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp index 07ac04fda80..db59e85e82b 100644 --- a/runtime/compiler/x/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/x/codegen/J9TreeEvaluator.cpp @@ -6918,27 +6918,94 @@ static bool genZeroInitForEntireObjectOrHybridArraylet( // generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg, generateX86MemoryReference(newObjectAddressReg, headerSizeInBytes, cg), cg); - TR::Register *scratchReg = NULL; + TR::Register *zeroInitScratchReg = NULL; if (comp->target().is64Bit()) { - scratchReg = srm->findOrCreateScratchRegister(); - generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg, newObjectAddressReg, cg); + zeroInitScratchReg = srm->findOrCreateScratchRegister(); } - else + + // If the compile-time size is unknown, generate a runtime length check to + // determine if REP STOS initialization is more appropriate. + // + // On 32-bit, always do REP STOS initialization inline. + // + + static const char *p = feGetEnv("TR_repStosZeroInitThresholdBytes"); + static int32_t repStosZeroInitThresholdBytes = p ? atoi(p) : 64; + static bool doInlineRepStosZeroInit = feGetEnv("TR_dontInlineRepStosZeroInit") ? false : true; + +#ifdef TR_TARGET_64BIT + if (sizeReg && doInlineRepStosZeroInit) { - generateRegInstruction(TR::InstOpCode::PUSHReg, node, newObjectAddressReg, cg); + TR::LabelSymbol *repStosInitLabelSym = generateLabelSymbol(cg); + TR::LabelSymbol *mergeInitLabelSym = generateLabelSymbol(cg); + + generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, numBytesToZeroInitReg, repStosZeroInitThresholdBytes, cg); + generateLabelInstruction(TR::InstOpCode::JG4, node, repStosInitLabelSym, cg); + + generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, zeroInitScratchReg, zeroInitScratchReg, cg); + + // Generate mainline zero initialization with stores + // + TR::LabelSymbol *zeroInitLoopLabelSym = generateLabelSymbol(cg); + generateLabelInstruction(TR::InstOpCode::label, node, zeroInitLoopLabelSym, cg); + generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, + generateX86MemoryReference(segmentReg, 0, cg), + zeroInitScratchReg, cg); + generateRegImmInstruction(TR::InstOpCode::ADD8RegImms, node, segmentReg, 8, cg); + generateRegImmInstruction(TR::InstOpCode::SUB8RegImms, node, numBytesToZeroInitReg, 8, cg); + generateRegImmInstruction(TR::InstOpCode::CMP8RegImms, node, numBytesToZeroInitReg, 0, cg); + generateLabelInstruction(TR::InstOpCode::JG4, node, zeroInitLoopLabelSym, cg); + + { + // Generate out-of-line REP STOS initialization + // + TR_OutlinedInstructionsGenerator og(repStosInitLabelSym, node, cg); + + // newObjectAddressReg must be in rax + // segmentReg must be in rdi + // numBytesToZeroInitReg must be in rcx + // + generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, zeroInitScratchReg, newObjectAddressReg, cg); + generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, newObjectAddressReg, newObjectAddressReg, cg); + generateInstruction(TR::InstOpCode::REPSTOSB, node, cg); + generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, newObjectAddressReg, zeroInitScratchReg, cg); + generateLabelInstruction(TR::InstOpCode::JMP4, node, mergeInitLabelSym, cg); + og.endOutlinedInstructionSequence(); } - generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, newObjectAddressReg, newObjectAddressReg, cg); - generateInstruction(TR::InstOpCode::REPSTOSB, node, cg); - if (comp->target().is64Bit()) - { - generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, newObjectAddressReg, scratchReg, cg); - srm->reclaimScratchRegister(scratchReg); + + srm->reclaimScratchRegister(zeroInitScratchReg); + + // Merge + // + generateLabelInstruction(TR::InstOpCode::label, node, mergeInitLabelSym, cg); } else { - generateRegInstruction(TR::InstOpCode::POPReg, node, newObjectAddressReg, cg); +#endif + + if (comp->target().is64Bit()) + { + generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, zeroInitScratchReg, newObjectAddressReg, cg); + } + else + { + generateRegInstruction(TR::InstOpCode::PUSHReg, node, newObjectAddressReg, cg); + } + generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, newObjectAddressReg, newObjectAddressReg, cg); + generateInstruction(TR::InstOpCode::REPSTOSB, node, cg); + if (comp->target().is64Bit()) + { + generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, newObjectAddressReg, zeroInitScratchReg, cg); + srm->reclaimScratchRegister(zeroInitScratchReg); + } + else + { + generateRegInstruction(TR::InstOpCode::POPReg, node, newObjectAddressReg, cg); + } +#ifdef TR_TARGET_64BIT } +#endif return true; }