@@ -63,64 +63,76 @@ TSparsedMerger::TWriter::TWriter(const TColumnMergeContext& context)
63
63
IndexBuilderImpl = (arrow::UInt32Builder*)(IndexBuilder.get ());
64
64
}
65
65
66
- bool TSparsedMerger::TCursor::AddIndexTo (const ui32 index, TWriter& writer) {
66
+ bool TSparsedMerger::TPlainChunkCursor::AddIndexTo (const ui32 index, TWriter& writer, const TColumnMergeContext& context) {
67
+ if (ChunkFinishPosition <= index ) {
68
+ InitArrays (index );
69
+ }
70
+ AFL_VERIFY (ChunkStartPosition <= index );
71
+ if (NArrow::ColumnEqualsScalar (ChunkAddress->GetArray (), index - ChunkStartPosition, context.GetLoader ()->GetDefaultValue ())) {
72
+ return false ;
73
+ } else {
74
+ writer.AddRealData (ChunkAddress->GetArray (), index - ChunkStartPosition);
75
+ return true ;
76
+ }
77
+ }
78
+
79
+ bool TSparsedMerger::TSparsedChunkCursor::AddIndexTo (const ui32 index, TWriter& writer, const TColumnMergeContext& /* context*/ ) {
80
+ AFL_VERIFY (ChunkStartGlobalPosition <= index );
67
81
if (index < NextGlobalPosition) {
68
82
return false ;
69
- } else if ( index == NextGlobalPosition) {
70
- if (index == CommonShift + Chunk-> GetRecordsCount () ) {
83
+ } else {
84
+ if (FinishGlobalPosition <= index ) {
71
85
InitArrays (index );
72
- if (index != NextGlobalPosition) {
86
+ }
87
+ if (index == NextGlobalPosition) {
88
+ writer.AddRealData (Chunk->GetColValue (), NextLocalPosition);
89
+ if (++NextLocalPosition < Chunk->GetNotDefaultRecordsCount ()) {
90
+ NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetIndexUnsafeFast (NextLocalPosition);
91
+ return true ;
92
+ } else {
93
+ NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetRecordsCount ();
73
94
return false ;
74
95
}
75
- }
76
- writer.AddRealData (Chunk->GetColValue (), NextLocalPosition);
77
- if (++NextLocalPosition < Chunk->GetNotDefaultRecordsCount ()) {
78
- NextGlobalPosition = CommonShift + Chunk->GetIndexUnsafeFast (NextLocalPosition);
79
- return true ;
80
96
} else {
81
- NextGlobalPosition = CommonShift + Chunk->GetRecordsCount ();
97
+ bool found = false ;
98
+ for (; NextLocalPosition < Chunk->GetNotDefaultRecordsCount (); ++NextLocalPosition) {
99
+ NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetIndexUnsafeFast (NextLocalPosition);
100
+ if (NextGlobalPosition == index ) {
101
+ writer.AddRealData (Chunk->GetColValue (), NextLocalPosition);
102
+ found = true ;
103
+ } else if (index < NextGlobalPosition) {
104
+ return found;
105
+ }
106
+ }
107
+ NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetRecordsCount ();
82
108
return false ;
83
109
}
84
110
}
85
- AFL_VERIFY (Chunk->GetStartPosition () <= index );
86
- if (CommonShift + Chunk->GetRecordsCount () <= index ) {
111
+ }
112
+
113
+ bool TSparsedMerger::TCursor::AddIndexTo (const ui32 index, TWriter& writer) {
114
+ if (FinishGlobalPosition <= index ) {
87
115
InitArrays (index );
88
116
}
89
- bool found = false ;
90
- for (; NextLocalPosition < Chunk->GetNotDefaultRecordsCount (); ++NextLocalPosition) {
91
- NextGlobalPosition = CommonShift + Chunk->GetIndexUnsafeFast (NextLocalPosition);
92
- if (NextGlobalPosition == index ) {
93
- writer.AddRealData (Chunk->GetColValue (), NextLocalPosition);
94
- found = true ;
95
- } else if (index < NextGlobalPosition) {
96
- return found;
97
- }
117
+ if (SparsedCursor) {
118
+ return SparsedCursor->AddIndexTo (index , writer, Context);
119
+ } else {
120
+ return PlainCursor->AddIndexTo (index , writer, Context);
98
121
}
99
- NextGlobalPosition = CommonShift + Chunk->GetRecordsCount ();
100
- return false ;
101
122
}
102
123
103
124
void TSparsedMerger::TCursor::InitArrays (const ui32 position) {
104
- if (!CurrentOwnedArray || !CurrentOwnedArray->GetAddress ().Contains (position)) {
105
- CurrentOwnedArray = Array->GetArray (CurrentOwnedArray, position, Array);
106
- if (CurrentOwnedArray->GetArray ()->GetType () == NArrow::NAccessor::IChunkedArray::EType::SparsedArray) {
107
- CurrentSparsedArray = static_pointer_cast<NArrow::NAccessor::TSparsedArray>(CurrentOwnedArray->GetArray ());
108
- } else {
109
- CurrentSparsedArray = make_shared<NArrow::NAccessor::TSparsedArray>(*CurrentOwnedArray->GetArray (), Context.GetDefaultValue ());
110
- }
111
- Chunk.reset ();
112
- }
113
- if (!Chunk || Chunk->GetFinishPosition () <= position) {
114
- Chunk = CurrentSparsedArray->GetSparsedChunk (CurrentOwnedArray->GetAddress ().GetLocalIndex (position));
115
- AFL_VERIFY (Chunk->GetRecordsCount ());
116
- AFL_VERIFY (CurrentOwnedArray->GetAddress ().GetGlobalStartPosition () + Chunk->GetStartPosition () <= position &&
117
- position < CurrentOwnedArray->GetAddress ().GetGlobalStartPosition () + Chunk->GetFinishPosition ())
118
- (" pos" , position)(" start" , Chunk->GetStartPosition ())(" finish" , Chunk->GetFinishPosition ())(
119
- " shift" , CurrentOwnedArray->GetAddress ().GetGlobalStartPosition ());
125
+ AFL_VERIFY (!CurrentOwnedArray || !CurrentOwnedArray->GetAddress ().Contains (position));
126
+ CurrentOwnedArray = Array->GetArray (CurrentOwnedArray, position, Array);
127
+ if (CurrentOwnedArray->GetArray ()->GetType () == NArrow::NAccessor::IChunkedArray::EType::SparsedArray) {
128
+ auto sparsedArray = static_pointer_cast<NArrow::NAccessor::TSparsedArray>(CurrentOwnedArray->GetArray ());
129
+ SparsedCursor = std::make_shared<TSparsedChunkCursor>(sparsedArray, &*CurrentOwnedArray);
130
+ PlainCursor = nullptr ;
131
+ } else {
132
+ PlainCursor = make_shared<TPlainChunkCursor>(CurrentOwnedArray->GetArray (), &*CurrentOwnedArray);
133
+ SparsedCursor = nullptr ;
120
134
}
121
- CommonShift = CurrentOwnedArray->GetAddress ().GetGlobalStartPosition () + Chunk->GetStartPosition ();
122
- NextGlobalPosition = CurrentOwnedArray->GetAddress ().GetGlobalStartPosition () + Chunk->GetFirstIndexNotDefault ();
123
- NextLocalPosition = 0 ;
135
+ FinishGlobalPosition = CurrentOwnedArray->GetAddress ().GetGlobalStartPosition () + CurrentOwnedArray->GetArray ()->GetRecordsCount ();
124
136
}
125
137
126
138
} // namespace NKikimr::NOlap::NCompaction
0 commit comments