Skip to content

Commit 0cf5b36

Browse files
fix: zero-initialize chunks from pool in allocateGlobalsSurface
When reusing memory chunks from the usm allocation pool, they may contain stale data from previous executions. This commit ensures that pooled allocations are properly zero-initialized before use by: - Transferring init data using transferMemoryToAllocation (unchanged) - Zero-initializing the BSS section via memsetAllocation when present - Zero-initializing entire totalSize for BSS-only allocations This prevents stale data from affecting program execution when chunks are reused from the pool. HSD-13013893112, HSD-18043476772, HSD-18043481899, HSD-18043487849, HSD-18043489182 Related-To: NEO-12287 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
1 parent 70fe9c5 commit 0cf5b36

File tree

2 files changed

+152
-0
lines changed

2 files changed

+152
-0
lines changed

shared/source/program/program_initialization.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
120120
device, gpuAllocation, allocationOffset, initData, initSize);
121121
UNRECOVERABLE_IF(!success);
122122

123+
if (isAllocatedFromPool && zeroInitSize > 0) {
124+
auto success = MemoryTransferHelper::memsetAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation),
125+
device, gpuAllocation, allocationOffset + initSize, 0, zeroInitSize);
126+
UNRECOVERABLE_IF(!success);
127+
}
128+
123129
if (auto csr = device.getDefaultEngine().commandStreamReceiver;
124130
isAllocatedFromPool && csr->getType() != NEO::CommandStreamReceiverType::hardware) {
125131
auto writeMemoryOperation = [&]() {
@@ -141,6 +147,10 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
141147
writeMemoryOperation();
142148
}
143149
}
150+
} else if (isAllocatedFromPool) {
151+
auto success = MemoryTransferHelper::memsetAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation),
152+
device, gpuAllocation, allocationOffset, 0, totalSize);
153+
UNRECOVERABLE_IF(!success);
144154
}
145155
return new SharedPoolAllocation(gpuAllocation, allocationOffset, allocatedSize, nullptr);
146156
}

shared/test/unit_test/program/program_initialization_tests.cpp

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,148 @@ TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, GivenUsmAllocPoolAnd2MBLocalMemAlig
423423
}
424424
}
425425

426+
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenReusedChunkThenDataIsProperlyInitializedAndRestIsZeroed) {
427+
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
428+
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
429+
430+
constexpr size_t initSize = 32u;
431+
constexpr size_t zeroInitSize = 32u;
432+
constexpr size_t totalSize = initSize + zeroInitSize;
433+
constexpr uint8_t initValue = 7u;
434+
constexpr uint8_t dirtyValue = 9u;
435+
436+
std::vector<uint8_t> initData(initSize, initValue);
437+
438+
auto verifyAllocation = [&](SharedPoolAllocation *allocation) {
439+
ASSERT_NE(nullptr, allocation);
440+
EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool(
441+
reinterpret_cast<void *>(allocation->getGpuAddress())));
442+
EXPECT_NE(allocation->getGraphicsAllocation()->getUnderlyingBufferSize(),
443+
allocation->getSize());
444+
EXPECT_TRUE(allocation->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
445+
EXPECT_EQ(AllocationType::globalSurface,
446+
allocation->getGraphicsAllocation()->getAllocationType());
447+
};
448+
449+
std::unique_ptr<SharedPoolAllocation> globalSurface1;
450+
std::unique_ptr<SharedPoolAllocation> globalSurface2;
451+
452+
// First allocation - new chunk from pool
453+
globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
454+
verifyAllocation(globalSurface1.get());
455+
EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), initData.data(), initSize));
456+
457+
// Dirty the chunk before returning to pool
458+
std::memset(globalSurface1->getUnderlyingBuffer(), dirtyValue, globalSurface1->getSize());
459+
device.getUsmGlobalSurfaceAllocPool()->freeSVMAlloc(reinterpret_cast<void *>(globalSurface1->getGpuAddress()), false);
460+
461+
// Second allocation - should reuse the same chunk
462+
globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
463+
verifyAllocation(globalSurface2.get());
464+
465+
// Verify it's the same chunk
466+
EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation());
467+
EXPECT_EQ(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress());
468+
EXPECT_EQ(globalSurface1->getOffset(), globalSurface2->getOffset());
469+
EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize());
470+
471+
// Verify proper initialization: initData followed by zeros for entire chunk
472+
std::vector<uint8_t> expectedData(globalSurface2->getSize(), 0);
473+
std::memcpy(expectedData.data(), initData.data(), initSize);
474+
475+
EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), expectedData.data(), expectedData.size()));
476+
}
477+
478+
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenReusedChunkWithBssOnlyDataThenEntireChunkIsZeroed) {
479+
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
480+
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
481+
482+
constexpr size_t totalSize = 64u;
483+
constexpr size_t zeroInitSize = totalSize; // BSS only - no init data
484+
constexpr uint8_t dirtyValue = 9u;
485+
486+
auto verifyAllocation = [&](SharedPoolAllocation *allocation) {
487+
ASSERT_NE(nullptr, allocation);
488+
EXPECT_TRUE(device.getUsmGlobalSurfaceAllocPool()->isInPool(
489+
reinterpret_cast<void *>(allocation->getGpuAddress())));
490+
EXPECT_NE(allocation->getGraphicsAllocation()->getUnderlyingBufferSize(),
491+
allocation->getSize());
492+
EXPECT_TRUE(allocation->getGraphicsAllocation()->isMemObjectsAllocationWithWritableFlags());
493+
EXPECT_EQ(AllocationType::globalSurface,
494+
allocation->getGraphicsAllocation()->getAllocationType());
495+
};
496+
497+
std::unique_ptr<SharedPoolAllocation> globalSurface1;
498+
std::unique_ptr<SharedPoolAllocation> globalSurface2;
499+
500+
// First allocation - BSS only (no init data)
501+
globalSurface1.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, nullptr));
502+
verifyAllocation(globalSurface1.get());
503+
504+
// Verify initial allocation is zeroed
505+
std::vector<uint8_t> expectedZeros(globalSurface1->getSize(), 0);
506+
EXPECT_EQ(0, memcmp(globalSurface1->getUnderlyingBuffer(), expectedZeros.data(), expectedZeros.size()));
507+
508+
// Dirty the chunk before returning to pool
509+
std::memset(globalSurface1->getUnderlyingBuffer(), dirtyValue, globalSurface1->getSize());
510+
device.getUsmGlobalSurfaceAllocPool()->freeSVMAlloc(reinterpret_cast<void *>(globalSurface1->getGpuAddress()), false);
511+
512+
// Second allocation - should reuse the same chunk
513+
globalSurface2.reset(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, nullptr));
514+
verifyAllocation(globalSurface2.get());
515+
516+
// Verify it's the same chunk
517+
EXPECT_EQ(globalSurface1->getGraphicsAllocation(), globalSurface2->getGraphicsAllocation());
518+
EXPECT_EQ(globalSurface1->getGpuAddress(), globalSurface2->getGpuAddress());
519+
EXPECT_EQ(globalSurface1->getOffset(), globalSurface2->getOffset());
520+
EXPECT_EQ(globalSurface1->getSize(), globalSurface2->getSize());
521+
522+
// Verify entire chunk is zeroed (no dirty data from previous use)
523+
EXPECT_EQ(0, memcmp(globalSurface2->getUnderlyingBuffer(), expectedZeros.data(), expectedZeros.size()));
524+
}
525+
526+
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenOnlyInitDataWithoutBssSectionThenMemsetAllocationIsNotCalled) {
527+
mockProductHelper->isBlitCopyRequiredForLocalMemoryResult = false;
528+
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
529+
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
530+
531+
constexpr size_t initSize = 64u;
532+
constexpr size_t zeroInitSize = 0u;
533+
constexpr size_t totalSize = initSize + zeroInitSize;
534+
constexpr uint8_t initValue = 7u;
535+
536+
std::vector<uint8_t> initData(initSize, initValue);
537+
538+
auto mockMemoryManager = static_cast<MockMemoryManager *>(device.getMemoryManager());
539+
mockMemoryManager->memsetAllocationCalled = 0;
540+
541+
auto globalSurface = std::unique_ptr<SharedPoolAllocation>(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
542+
543+
ASSERT_NE(nullptr, globalSurface);
544+
EXPECT_EQ(0u, mockMemoryManager->memsetAllocationCalled);
545+
}
546+
547+
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, givenPooledUSMAllocationWhenInitDataAndBssSectionThenMemsetAllocationIsCalledOnceForBssSection) {
548+
mockProductHelper->isBlitCopyRequiredForLocalMemoryResult = false;
549+
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
550+
linkerInputExportGlobalVariables.traits.exportsGlobalVariables = true;
551+
552+
constexpr size_t initSize = 32u;
553+
constexpr size_t zeroInitSize = 32u;
554+
constexpr size_t totalSize = initSize + zeroInitSize;
555+
constexpr uint8_t initValue = 7u;
556+
557+
std::vector<uint8_t> initData(initSize, initValue);
558+
559+
auto mockMemoryManager = static_cast<MockMemoryManager *>(device.getMemoryManager());
560+
mockMemoryManager->memsetAllocationCalled = 0;
561+
562+
auto globalSurface = std::unique_ptr<SharedPoolAllocation>(allocateGlobalsSurface(svmAllocsManager.get(), device, totalSize, zeroInitSize, false, &linkerInputExportGlobalVariables, initData.data()));
563+
564+
ASSERT_NE(nullptr, globalSurface);
565+
EXPECT_EQ(1u, mockMemoryManager->memsetAllocationCalled);
566+
}
567+
426568
TEST_F(AllocateGlobalSurfaceWithUsmPoolTest, Given2MBLocalMemAlignmentEnabledButUsmPoolInitializeFailsThenDoNotUseUsmPool) {
427569
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
428570

0 commit comments

Comments
 (0)