@@ -350,6 +350,106 @@ std::vector<const TColumnRecord*> TPortionDataAccessor::GetColumnChunksPointers(
350350 return result;
351351}
352352
353+ std::vector<TPortionDataAccessor::TReadPage> TPortionDataAccessor::BuildReadPages (const ui64 memoryLimit, const std::set<ui32>& entityIds) const {
354+ class TEntityDelimiter {
355+ private:
356+ YDB_READONLY (ui32, IndexStart, 0 );
357+ YDB_READONLY (ui32, EntityId, 0 );
358+ YDB_READONLY (ui32, ChunkIdx, 0 );
359+ YDB_READONLY (ui64, MemoryStartChunk, 0 );
360+ YDB_READONLY (ui64, MemoryFinishChunk, 0 );
361+
362+ public:
363+ TEntityDelimiter (const ui32 indexStart, const ui32 entityId, const ui32 chunkIdx, const ui64 memStartChunk, const ui64 memFinishChunk)
364+ : IndexStart(indexStart)
365+ , EntityId(entityId)
366+ , ChunkIdx(chunkIdx)
367+ , MemoryStartChunk(memStartChunk)
368+ , MemoryFinishChunk(memFinishChunk) {
369+ }
370+
371+ bool operator <(const TEntityDelimiter& item) const {
372+ return std::tie (IndexStart, EntityId, ChunkIdx) < std::tie (item.IndexStart , item.EntityId , item.ChunkIdx );
373+ }
374+ };
375+
376+ class TGlobalDelimiter {
377+ private:
378+ YDB_READONLY (ui32, IndexStart, 0 );
379+ YDB_ACCESSOR (ui64, UsedMemory, 0 );
380+ YDB_ACCESSOR (ui64, WholeChunksMemory, 0 );
381+
382+ public:
383+ TGlobalDelimiter (const ui32 indexStart)
384+ : IndexStart(indexStart) {
385+ }
386+ };
387+
388+ std::vector<TEntityDelimiter> delimiters;
389+
390+ ui32 lastAppliedId = 0 ;
391+ ui32 currentRecordIdx = 0 ;
392+ bool needOne = false ;
393+ const TColumnRecord* lastRecord = nullptr ;
394+ for (auto && i : GetRecordsVerified ()) {
395+ if (lastAppliedId != i.GetEntityId ()) {
396+ if (delimiters.size ()) {
397+ AFL_VERIFY (delimiters.back ().GetIndexStart () == PortionInfo->GetRecordsCount ());
398+ }
399+ needOne = entityIds.contains (i.GetEntityId ());
400+ currentRecordIdx = 0 ;
401+ lastAppliedId = i.GetEntityId ();
402+ lastRecord = nullptr ;
403+ }
404+ if (!needOne) {
405+ continue ;
406+ }
407+ delimiters.emplace_back (
408+ currentRecordIdx, i.GetEntityId (), i.GetChunkIdx (), i.GetMeta ().GetRawBytes (), lastRecord ? lastRecord->GetMeta ().GetRawBytes () : 0 );
409+ currentRecordIdx += i.GetMeta ().GetRecordsCount ();
410+ if (currentRecordIdx == PortionInfo->GetRecordsCount ()) {
411+ delimiters.emplace_back (currentRecordIdx, i.GetEntityId (), i.GetChunkIdx () + 1 , 0 , i.GetMeta ().GetRawBytes ());
412+ }
413+ lastRecord = &i;
414+ }
415+ std::sort (delimiters.begin (), delimiters.end ());
416+ std::vector<TGlobalDelimiter> sumDelimiters;
417+ for (auto && i : delimiters) {
418+ if (sumDelimiters.empty ()) {
419+ sumDelimiters.emplace_back (i.GetIndexStart ());
420+ } else if (sumDelimiters.back ().GetIndexStart () != i.GetIndexStart ()) {
421+ AFL_VERIFY (sumDelimiters.back ().GetIndexStart () < i.GetIndexStart ());
422+ TGlobalDelimiter backDelimiter (i.GetIndexStart ());
423+ backDelimiter.MutableWholeChunksMemory () = sumDelimiters.back ().GetWholeChunksMemory ();
424+ backDelimiter.MutableUsedMemory () = sumDelimiters.back ().GetUsedMemory ();
425+ sumDelimiters.emplace_back (std::move (backDelimiter));
426+ }
427+ sumDelimiters.back ().MutableWholeChunksMemory () += i.GetMemoryFinishChunk ();
428+ sumDelimiters.back ().MutableUsedMemory () += i.GetMemoryStartChunk ();
429+ }
430+ std::vector<ui32> recordIdx = { 0 };
431+ std::vector<ui64> packMemorySize;
432+ const TGlobalDelimiter* lastBorder = &sumDelimiters.front ();
433+ for (auto && i : sumDelimiters) {
434+ const i64 sumMemory = (i64 )i.GetUsedMemory () - (i64 )lastBorder->GetWholeChunksMemory ();
435+ AFL_VERIFY (sumMemory > 0 );
436+ if (((ui64)sumMemory >= memoryLimit || i.GetIndexStart () == PortionInfo->GetRecordsCount ()) && i.GetIndexStart ()) {
437+ AFL_VERIFY (lastBorder->GetIndexStart () < i.GetIndexStart ());
438+ recordIdx.emplace_back (i.GetIndexStart ());
439+ packMemorySize.emplace_back (sumMemory);
440+ lastBorder = &i;
441+ }
442+ }
443+ AFL_VERIFY (recordIdx.front () == 0 );
444+ AFL_VERIFY (recordIdx.back () == PortionInfo->GetRecordsCount ());
445+ AFL_VERIFY (recordIdx.size () == packMemorySize.size () + 1 );
446+ std::vector<TReadPage> pages;
447+ for (ui32 i = 0 ; i < packMemorySize.size (); ++i) {
448+ pages.emplace_back (recordIdx[i], recordIdx[i + 1 ] - recordIdx[i], packMemorySize[i]);
449+ }
450+ return pages;
451+ }
452+
353453std::vector<TPortionDataAccessor::TPage> TPortionDataAccessor::BuildPages () const {
354454 std::vector<TPage> pages;
355455 struct TPart {
0 commit comments