11#include " columnshard.h"
22#include " columnshard_impl.h"
3+
34#include " ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.h"
45
56#include < ydb/core/protos/kqp.pb.h>
7+ #include < ydb/core/tx/columnshard/blobs_reader/actor.h>
68#include < ydb/core/tx/columnshard/engines/column_engine_logs.h>
79
810#include < yql/essentials/core/minsketch/count_min_sketch.h>
911
10-
1112namespace NKikimr ::NColumnShard {
1213
1314void TColumnShard::Handle (NStat::TEvStatistics::TEvAnalyzeTable::TPtr& ev, const TActorContext&) {
1415 auto & requestRecord = ev->Get ()->Record ;
1516 // TODO Start a potentially long analysis process.
1617 // ...
1718
18-
19-
2019 // Return the response when the analysis is completed
2120 auto response = std::make_unique<NStat::TEvStatistics::TEvAnalyzeTableResponse>();
2221 auto & responseRecord = response->Record ;
@@ -64,8 +63,7 @@ class TResultAccumulator {
6463 std::unique_ptr<NStat::TEvStatistics::TEvStatisticsResponse>&& response)
6564 : RequestSenderActorId(requestSenderActorId)
6665 , Cookie(cookie)
67- , Response(std::move(response))
68- {
66+ , Response(std::move(response)) {
6967 for (auto && i : tags) {
7068 AFL_VERIFY (Calculated.emplace (i, nullptr ).second );
7169 }
@@ -104,11 +102,11 @@ class TResultAccumulator {
104102 OnResultReady ();
105103 }
106104 }
107-
108105};
109106
110107class TColumnPortionsAccumulator {
111108private:
109+ const std::shared_ptr<NOlap::IStoragesManager> StoragesManager;
112110 const std::set<ui32> ColumnTagsRequested;
113111 std::vector<NOlap::TPortionInfo::TConstPtr> Portions;
114112 const ui32 PortionsCountLimit = 10000 ;
@@ -117,19 +115,66 @@ class TColumnPortionsAccumulator {
117115 const std::shared_ptr<NOlap::TVersionedIndex> VersionedIndex;
118116
119117public:
120- TColumnPortionsAccumulator (const std::shared_ptr<TResultAccumulator>& result, const ui32 portionsCountLimit,
121- const std::set<ui32>& originalColumnTags, const std::shared_ptr<NOlap::TVersionedIndex>& vIndex,
118+ TColumnPortionsAccumulator (const std::shared_ptr<NOlap::IStoragesManager>& storagesManager,
119+ const std::shared_ptr<TResultAccumulator>& result, const ui32 portionsCountLimit, const std::set<ui32>& originalColumnTags,
120+ const std::shared_ptr<NOlap::TVersionedIndex>& vIndex,
122121 const std::shared_ptr<NOlap::NDataAccessorControl::IDataAccessorsManager>& dataAccessorsManager)
123- : ColumnTagsRequested(originalColumnTags)
122+ : StoragesManager(storagesManager)
123+ , ColumnTagsRequested(originalColumnTags)
124124 , PortionsCountLimit(portionsCountLimit)
125125 , DataAccessors(dataAccessorsManager)
126126 , Result(result)
127- , VersionedIndex(vIndex)
128- {
127+ , VersionedIndex(vIndex) {
129128 }
130129
130+ class TIndexReadTask : public NOlap ::NBlobOperations::NRead::ITask {
131+ private:
132+ using TBase = NOlap::NBlobOperations::NRead::ITask;
133+ const std::shared_ptr<TResultAccumulator> Result;
134+ THashMap<ui32, THashMap<TString, THashSet<NOlap::TBlobRange>>> RangesByColumn;
135+ THashMap<ui32, std::unique_ptr<TCountMinSketch>> SketchesByColumns;
136+
137+ protected:
138+ virtual void DoOnDataReady (const std::shared_ptr<NOlap::NResourceBroker::NSubscribe::TResourcesGuard>& /* resourcesGuard*/ ) override {
139+ NOlap::NBlobOperations::NRead::TCompositeReadBlobs blobsData = ExtractBlobsData ();
140+ for (auto && [columnId, data] : RangesByColumn) {
141+ for (auto && [storageId, blobs] : data) {
142+ for (auto && b : blobs) {
143+ const TString blob = blobsData.Extract (storageId, b);
144+ auto sketch = std::unique_ptr<TCountMinSketch>(TCountMinSketch::FromString (blob.data (), blob.size ()));
145+ *SketchesByColumns[columnId] += *sketch;
146+ }
147+ }
148+ }
149+ Result->AddResult (std::move (SketchesByColumns));
150+ }
151+
152+ virtual bool DoOnError (
153+ const TString& storageId, const NOlap::TBlobRange& range, const NOlap::IBlobsReadingAction::TErrorStatus& status) override {
154+ AFL_ERROR (NKikimrServices::TX_COLUMNSHARD)(" event" , " DoOnError" )(" storage_id" , storageId)(" blob_id" , range)(
155+ " status" , status.GetErrorMessage ())(" status_code" , status.GetStatus ());
156+ AFL_VERIFY (status.GetStatus () != NKikimrProto::EReplyStatus::NODATA)(" blob_id" , range)(" status" , status.GetStatus ())(
157+ " error" , status.GetErrorMessage ())(" type" , " STATISTICS" );
158+ return false ;
159+ }
160+
161+ public:
162+ TIndexReadTask (const std::shared_ptr<TResultAccumulator>& result,
163+ std::vector<std::shared_ptr<NOlap::IBlobsReadingAction>>&& readingActions,
164+ THashMap<ui32, THashMap<TString, THashSet<NOlap::TBlobRange>>>&& rangesByColumn,
165+ THashMap<ui32, std::unique_ptr<TCountMinSketch>>&& readySketches)
166+ : TBase(std::move(readingActions), " STATISTICS" , " STATISTICS" )
167+ , Result(result)
168+ , RangesByColumn(std::move(rangesByColumn))
169+ , SketchesByColumns(std::move(readySketches)) {
170+ AFL_VERIFY (!!Result);
171+ AFL_VERIFY (RangesByColumn.size ());
172+ }
173+ };
174+
131175 class TMetadataSubscriber : public NOlap ::IDataAccessorRequestsSubscriber {
132176 private:
177+ const std::shared_ptr<NOlap::IStoragesManager> StoragesManager;
133178 const std::shared_ptr<TResultAccumulator> Result;
134179 std::shared_ptr<NOlap::TVersionedIndex> VersionedIndex;
135180 const std::set<ui32> ColumnTagsRequested;
@@ -143,6 +188,8 @@ class TColumnPortionsAccumulator {
143188 sketchesByColumns.emplace (id, TCountMinSketch::Create ());
144189 }
145190
191+ THashMap<ui32, THashMap<TString, THashSet<NOlap::TBlobRange>>> rangesByColumn;
192+
146193 for (const auto & [id, portionInfo] : result.GetPortions ()) {
147194 std::shared_ptr<NOlap::ISnapshotSchema> portionSchema = portionInfo.GetPortionInfo ().GetSchema (*VersionedIndex);
148195 for (const ui32 columnId : ColumnTagsRequested) {
@@ -154,26 +201,43 @@ class TColumnPortionsAccumulator {
154201 }
155202 AFL_VERIFY (indexMeta->GetColumnIds ().size () == 1 );
156203
157- const std::vector<TString> data = portionInfo.GetIndexInplaceDataVerified (indexMeta->GetIndexId ());
204+ if (!indexMeta->IsInplaceData ()) {
205+ portionInfo.FillBlobRangesByStorage (rangesByColumn, portionSchema->GetIndexInfo (), { indexMeta->GetIndexId () });
206+ } else {
207+ const std::vector<TString> data = portionInfo.GetIndexInplaceDataVerified (indexMeta->GetIndexId ());
158208
159- for (const auto & sketchAsString : data) {
160- auto sketch =
161- std::unique_ptr<TCountMinSketch>(TCountMinSketch::FromString (sketchAsString.data (), sketchAsString.size ()));
162- *sketchesByColumns[columnId] += *sketch;
209+ for (const auto & sketchAsString : data) {
210+ auto sketch =
211+ std::unique_ptr<TCountMinSketch>(TCountMinSketch::FromString (sketchAsString.data (), sketchAsString.size ()));
212+ *sketchesByColumns[columnId] += *sketch;
213+ }
163214 }
164215 }
165216 }
166- Result->AddResult (std::move (sketchesByColumns));
217+ if (rangesByColumn.size ()) {
218+ NOlap::TBlobsAction blobsAction (StoragesManager, NOlap::NBlobOperations::EConsumer::STATISTICS);
219+ for (auto && i : rangesByColumn) {
220+ for (auto && [storageId, ranges] : i.second ) {
221+ auto reader = blobsAction.GetReading (storageId);
222+ for (auto && i : ranges) {
223+ reader->AddRange (i);
224+ }
225+ }
226+ }
227+ TActorContext::AsActorContext ().Register (new NOlap::NBlobOperations::NRead::TActor (
228+ std::make_shared<TIndexReadTask>(Result, blobsAction.GetReadingActions (), std::move (rangesByColumn), std::move (sketchesByColumns))));
229+ } else {
230+ Result->AddResult (std::move (sketchesByColumns));
231+ }
167232 }
168233
169234 public:
170- TMetadataSubscriber (
171- const std::shared_ptr<TResultAccumulator>& result, const std::shared_ptr<NOlap::TVersionedIndex>& vIndex, const std::set<ui32>& tags)
172- : Result(result)
235+ TMetadataSubscriber (const std::shared_ptr<NOlap::IStoragesManager>& storagesManager, const std::shared_ptr<TResultAccumulator>& result,
236+ const std::shared_ptr<NOlap::TVersionedIndex>& vIndex, const std::set<ui32>& tags)
237+ : StoragesManager(storagesManager)
238+ , Result(result)
173239 , VersionedIndex(vIndex)
174- , ColumnTagsRequested(tags)
175- {
176-
240+ , ColumnTagsRequested(tags) {
177241 }
178242 };
179243
@@ -186,7 +250,7 @@ class TColumnPortionsAccumulator {
186250 for (auto && i : Portions) {
187251 request->AddPortion (i);
188252 }
189- request->RegisterSubscriber (std::make_shared<TMetadataSubscriber>(Result, VersionedIndex, ColumnTagsRequested));
253+ request->RegisterSubscriber (std::make_shared<TMetadataSubscriber>(StoragesManager, Result, VersionedIndex, ColumnTagsRequested));
190254 Portions.clear ();
191255 DataAccessors->AskData (request);
192256 }
@@ -234,7 +298,8 @@ void TColumnShard::Handle(NStat::TEvStatistics::TEvStatisticsRequest::TPtr& ev,
234298 std::shared_ptr<TResultAccumulator> resultAccumulator =
235299 std::make_shared<TResultAccumulator>(columnTagsRequested, ev->Sender , ev->Cookie , std::move (response));
236300 auto versionedIndex = std::make_shared<NOlap::TVersionedIndex>(index.GetVersionedIndex ());
237- TColumnPortionsAccumulator portionsPack (resultAccumulator, 1000 , columnTagsRequested, versionedIndex, DataAccessorsManager.GetObjectPtrVerified ());
301+ TColumnPortionsAccumulator portionsPack (
302+ StoragesManager, resultAccumulator, 1000 , columnTagsRequested, versionedIndex, DataAccessorsManager.GetObjectPtrVerified ());
238303
239304 for (const auto & [_, portionInfo] : spg->GetPortions ()) {
240305 if (!portionInfo->IsVisible (GetMaxReadVersion ())) {
@@ -246,4 +311,4 @@ void TColumnShard::Handle(NStat::TEvStatistics::TEvStatisticsRequest::TPtr& ev,
246311 resultAccumulator->Start ();
247312}
248313
249- }
314+ } // namespace NKikimr::NColumnShard
0 commit comments