Skip to content

improve performace for stat requests (sysview) #7211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 50 additions & 11 deletions ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,19 @@ namespace NKikimr::NOlap::NReader::NSysView::NChunks {

void TStatsIterator::AppendStats(const std::vector<std::unique_ptr<arrow::ArrayBuilder>>& builders, const TPortionInfo& portion) const {
auto portionSchema = ReadMetadata->GetLoadSchemaVerified(portion);
const std::string prod = ::ToString(portion.GetMeta().Produced);
auto it = PortionType.find(portion.GetMeta().Produced);
if (it == PortionType.end()) {
it = PortionType.emplace(portion.GetMeta().Produced, ::ToString(portion.GetMeta().Produced)).first;
}
const arrow::util::string_view prodView = it->second.GetView();
const bool activity = !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot());
static const TString ConstantEntityIsColumn = "COL";
static const arrow::util::string_view ConstantEntityIsColumnView =
arrow::util::string_view(ConstantEntityIsColumn.data(), ConstantEntityIsColumn.size());
static const TString ConstantEntityIsIndex = "IDX";
static const arrow::util::string_view ConstantEntityIsIndexView =
arrow::util::string_view(ConstantEntityIsIndex.data(), ConstantEntityIsIndex.size());
auto& entityStorages = EntityStorageNames[portion.GetMeta().GetTierName()];
{
std::vector<const TColumnRecord*> records;
for (auto&& r : portion.Records) {
Expand All @@ -16,26 +27,54 @@ void TStatsIterator::AppendStats(const std::vector<std::unique_ptr<arrow::ArrayB
if (Reverse) {
std::reverse(records.begin(), records.end());
}
THashMap<ui32, TString> blobsIds;
std::optional<ui32> lastColumnId;
arrow::util::string_view lastColumnName;
arrow::util::string_view lastTierName;
for (auto&& r : records) {
NArrow::Append<arrow::UInt64Type>(*builders[0], portion.GetPathId());
NArrow::Append<arrow::StringType>(*builders[1], prod);
NArrow::Append<arrow::StringType>(*builders[1], prodView);
NArrow::Append<arrow::UInt64Type>(*builders[2], ReadMetadata->TabletId);
NArrow::Append<arrow::UInt64Type>(*builders[3], r->GetMeta().GetNumRows());
NArrow::Append<arrow::UInt64Type>(*builders[4], r->GetMeta().GetRawBytes());
NArrow::Append<arrow::UInt64Type>(*builders[5], portion.GetPortionId());
NArrow::Append<arrow::UInt64Type>(*builders[6], r->GetChunkIdx());
NArrow::Append<arrow::StringType>(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined"));
if (!lastColumnId || *lastColumnId != r->GetColumnId()) {
{
auto it = ColumnNamesById.find(r->GetColumnId());
if (it == ColumnNamesById.end()) {
it =
ColumnNamesById.emplace(r->GetColumnId(), portionSchema->GetFieldByColumnIdVerified(r->GetColumnId())->name()).first;
}
lastColumnName = it->second.GetView();
}
{
auto it = entityStorages.find(r->GetColumnId());
if (it == entityStorages.end()) {
it = entityStorages.emplace(r->GetColumnId(),
portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName())).first;
}
lastTierName = it->second.GetView();
}
lastColumnId = r->GetColumnId();
}
NArrow::Append<arrow::StringType>(*builders[7], lastColumnName);
NArrow::Append<arrow::UInt32Type>(*builders[8], r->GetColumnId());
std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy();
NArrow::Append<arrow::StringType>(*builders[9], blobIdString);
{
auto itBlobIdString = blobsIds.find(r->GetBlobRange().GetBlobIdxVerified());
if (itBlobIdString == blobsIds.end()) {
itBlobIdString = blobsIds.emplace(
r->GetBlobRange().GetBlobIdxVerified(), portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy()).first;
}
NArrow::Append<arrow::StringType>(
*builders[9], arrow::util::string_view(itBlobIdString->second.data(), itBlobIdString->second.size()));
}
NArrow::Append<arrow::UInt64Type>(*builders[10], r->BlobRange.Offset);
NArrow::Append<arrow::UInt64Type>(*builders[11], r->BlobRange.Size);
NArrow::Append<arrow::BooleanType>(*builders[12], activity);

const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName());
std::string strTierName(tierName.data(), tierName.size());
NArrow::Append<arrow::StringType>(*builders[13], strTierName);
NArrow::Append<arrow::StringType>(*builders[14], "COL");
NArrow::Append<arrow::StringType>(*builders[13], arrow::util::string_view(lastTierName.data(), lastTierName.size()));
NArrow::Append<arrow::StringType>(*builders[14], ConstantEntityIsColumnView);
}
}
{
Expand All @@ -48,7 +87,7 @@ void TStatsIterator::AppendStats(const std::vector<std::unique_ptr<arrow::ArrayB
}
for (auto&& r : indexes) {
NArrow::Append<arrow::UInt64Type>(*builders[0], portion.GetPathId());
NArrow::Append<arrow::StringType>(*builders[1], prod);
NArrow::Append<arrow::StringType>(*builders[1], prodView);
NArrow::Append<arrow::UInt64Type>(*builders[2], ReadMetadata->TabletId);
NArrow::Append<arrow::UInt64Type>(*builders[3], r->GetRecordsCount());
NArrow::Append<arrow::UInt64Type>(*builders[4], r->GetRawBytes());
Expand All @@ -70,7 +109,7 @@ void TStatsIterator::AppendStats(const std::vector<std::unique_ptr<arrow::ArrayB
const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName());
std::string strTierName(tierName.data(), tierName.size());
NArrow::Append<arrow::StringType>(*builders[13], strTierName);
NArrow::Append<arrow::StringType>(*builders[14], "IDX");
NArrow::Append<arrow::StringType>(*builders[14], ConstantEntityIsIndexView);
}
}
}
Expand Down
26 changes: 26 additions & 0 deletions ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,32 @@ class TReadStatsMetadata: public NAbstract::TReadStatsMetadata, std::enable_shar

class TStatsIterator: public NAbstract::TStatsIterator<NKikimr::NSysView::Schema::PrimaryIndexStats> {
private:
class TViewContainer {
private:
TString Data;
std::string STLData;
arrow::util::string_view View;

public:
const arrow::util::string_view& GetView() const {
return View;
}

TViewContainer(const TString& data)
: Data(data)
, View(arrow::util::string_view(Data.data(), Data.size())) {
}

TViewContainer(const std::string& data)
: STLData(data)
, View(arrow::util::string_view(STLData.data(), STLData.size())) {
}
};

mutable THashMap<ui32, TViewContainer> ColumnNamesById;
mutable THashMap<NPortion::EProduced, TViewContainer> PortionType;
mutable THashMap<TString, THashMap<ui32, TViewContainer>> EntityStorageNames;

using TBase = NAbstract::TStatsIterator<NKikimr::NSysView::Schema::PrimaryIndexStats>;
virtual bool AppendStats(const std::vector<std::unique_ptr<arrow::ArrayBuilder>>& builders, NAbstract::TGranuleMetaView& granule) const override;
virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const override;
Expand Down
Loading