Skip to content

Commit

Permalink
tp: Cleanup table filtering
Browse files Browse the repository at this point in the history
Change-Id: I19def43301022b2fcab074f65478a95065b659fd
  • Loading branch information
aMayzner committed Jul 29, 2024
1 parent 021422b commit c2a9824
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 136 deletions.
16 changes: 16 additions & 0 deletions src/trace_processor/db/column/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,22 @@ struct Query {

// OFFSET value. Can be "!= 0" only if `limit` has value.
uint32_t offset = 0;

// Returns true if query should be used for fetching minimum or maximum value
// of singular column.
inline bool IsMinMaxQuery() const {
// Order needs to specify the sorting.
return order_type == Query::OrderType::kSort
// There can be only one column for sorting.
&& orders.size() == 1
// Limit has value 1
&& limit.has_value() && *limit == 1;
}

// Returns true if query should be used for sorting.
inline bool RequireSort() const {
return order_type != Query::OrderType::kDistinct && !orders.empty();
}
};

// The enum type of the column.
Expand Down
100 changes: 3 additions & 97 deletions src/trace_processor/db/query_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,33 +39,11 @@ using Range = RowMap::Range;
using Indices = column::DataLayerChain::Indices;
using OrderedIndices = column::DataLayerChain::OrderedIndices;

static constexpr uint32_t kIndexVectorThreshold = 1024;

// Returns if |op| is an operation that can use the fact that the data is
// sorted.
bool IsSortingOp(FilterOp op) {
switch (op) {
case FilterOp::kEq:
case FilterOp::kLe:
case FilterOp::kLt:
case FilterOp::kGe:
case FilterOp::kGt:
case FilterOp::kIsNotNull:
case FilterOp::kIsNull:
return true;
case FilterOp::kGlob:
case FilterOp::kRegex:
case FilterOp::kNe:
return false;
}
PERFETTO_FATAL("For GCC");
}

} // namespace

void QueryExecutor::FilterColumn(const Constraint& c,
const column::DataLayerChain& chain,
RowMap* rm) {
void QueryExecutor::ApplyConstraint(const Constraint& c,
const column::DataLayerChain& chain,
RowMap* rm) {
// Shortcut of empty row map.
uint32_t rm_size = rm->size();
if (rm_size == 0)
Expand Down Expand Up @@ -157,78 +135,6 @@ void QueryExecutor::IndexSearch(const Constraint& c,
*rm = RowMap(std::move(table_indices));
}

RowMap QueryExecutor::FilterLegacy(const Table* table,
const std::vector<Constraint>& c_vec) {
RowMap rm(0, table->row_count());

// Prework - use indexes if possible and decide which one.
std::vector<uint32_t> maybe_idx_cols;

for (uint32_t i = 0; i < c_vec.size(); i++) {
const Constraint& c = c_vec[i];
// Id columns shouldn't use index.
if (table->columns()[c.col_idx].IsId()) {
break;
}

// The operation has to support sorting.
if (!IsSortingOp(c.op)) {
break;
}

maybe_idx_cols.push_back(c.col_idx);

// For the next col to be able to use index, all previous constraints have
// to be equality.
if (c.op != FilterOp::kEq) {
break;
}
}

OrderedIndices o_idxs;
while (!maybe_idx_cols.empty()) {
if (auto maybe_idx = table->GetIndex(maybe_idx_cols)) {
o_idxs = std::move(*maybe_idx);
break;
}
maybe_idx_cols.pop_back();
}

// If we can't use the index just filter in a standard way.
if (maybe_idx_cols.empty()) {
for (const auto& c : c_vec) {
FilterColumn(c, table->ChainForColumn(c.col_idx), &rm);
}
return rm;
}

for (uint32_t i = 0; i < maybe_idx_cols.size(); i++) {
const Constraint& c = c_vec[i];

Range r = table->ChainForColumn(c.col_idx).OrderedIndexSearch(c.op, c.value,
o_idxs);
o_idxs.data += r.start;
o_idxs.size = r.size();
}

std::vector<uint32_t> res_vec(o_idxs.data, o_idxs.data + o_idxs.size);
if (res_vec.size() < kIndexVectorThreshold) {
std::sort(res_vec.begin(), res_vec.end());
rm = RowMap(std::move(res_vec));
} else {
rm = RowMap(BitVector::FromUnsortedIndexVector(std::move(res_vec)));
}

// Filter the rest of constraints in a standard way.
for (uint32_t i = static_cast<uint32_t>(maybe_idx_cols.size());
i < c_vec.size(); i++) {
const Constraint& c = c_vec[i];
FilterColumn(c, table->ChainForColumn(c.col_idx), &rm);
}

return rm;
}

void QueryExecutor::SortLegacy(const Table* table,
const std::vector<Order>& ob,
std::vector<uint32_t>& out) {
Expand Down
13 changes: 5 additions & 8 deletions src/trace_processor/db/query_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,11 @@ class QueryExecutor {
RowMap Filter(const std::vector<Constraint>& cs) {
RowMap rm(0, row_count_);
for (const auto& c : cs) {
FilterColumn(c, *columns_[c.col_idx], &rm);
ApplyConstraint(c, *columns_[c.col_idx], &rm);
}
return rm;
}

// Enables QueryExecutor::Filter on Table columns.
static RowMap FilterLegacy(const Table*, const std::vector<Constraint>&);

// Enables QueryExecutor::Sort on Table columns.
static void SortLegacy(const Table*,
const std::vector<Order>&,
Expand All @@ -65,12 +62,12 @@ class QueryExecutor {
const column::DataLayerChain&,
RowMap*);

private:
// Updates RowMap with result of filtering single column using the Constraint.
static void FilterColumn(const Constraint&,
const column::DataLayerChain&,
RowMap*);
static void ApplyConstraint(const Constraint&,
const column::DataLayerChain&,
RowMap*);

private:
// Filters the column using Range algorithm - tries to find the smallest Range
// to filter the storage with.
static void LinearSearch(const Constraint&,
Expand Down
150 changes: 119 additions & 31 deletions src/trace_processor/db/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,51 @@ namespace perfetto::trace_processor {

namespace {
using Indices = column::DataLayerChain::Indices;

static constexpr uint32_t kIndexVectorThreshold = 1024;

// Returns if |op| is an operation that can use the fact that the data is
// sorted.
bool IsSortingOp(FilterOp op) {
switch (op) {
case FilterOp::kEq:
case FilterOp::kLe:
case FilterOp::kLt:
case FilterOp::kGe:
case FilterOp::kGt:
case FilterOp::kIsNotNull:
case FilterOp::kIsNull:
return true;
case FilterOp::kGlob:
case FilterOp::kRegex:
case FilterOp::kNe:
return false;
}
PERFETTO_FATAL("For GCC");
}

void ApplyMinMaxQuery(RowMap& rm,
Order o,
const column::DataLayerChain& chain) {
std::vector<uint32_t> table_indices = std::move(rm).TakeAsIndexVector();
auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
std::optional<Token> ret_tok =
(o.desc) ? chain.MaxElement(indices) : chain.MinElement(indices);
rm = (ret_tok.has_value()) ? RowMap(std::vector<uint32_t>{ret_tok->payload})
: RowMap();
}

void ApplyLimitAndOffset(RowMap& rm, const Query& q) {
uint32_t end = rm.size();
uint32_t start = std::min(q.offset, end);
if (q.limit) {
end = std::min(end, *q.limit + q.offset);
}
rm = rm.SelectRows(RowMap(start, end));
}

} // namespace

Table::Table(StringPool* pool,
uint32_t row_count,
std::vector<ColumnLegacy> columns,
Expand Down Expand Up @@ -92,6 +135,67 @@ Table Table::CopyExceptOverlays() const {
return {string_pool_, row_count_, std::move(cols), {}};
}

RowMap Table::TryApplyIndex(std::vector<Constraint>& c_vec) const {
RowMap rm(0, row_count());

// Prework - use indexes if possible and decide which one.
std::vector<uint32_t> maybe_idx_cols;
for (uint32_t i = 0; i < c_vec.size(); i++) {
const Constraint& c = c_vec[i];
// Id columns shouldn't use index.
if (columns()[c.col_idx].IsId()) {
break;
}
// The operation has to support sorting.
if (!IsSortingOp(c.op)) {
break;
}

maybe_idx_cols.push_back(c.col_idx);

// For the next col to be able to use index, all previous constraints have
// to be equality.
if (c.op != FilterOp::kEq) {
break;
}
}

OrderedIndices o_idxs;
while (!maybe_idx_cols.empty()) {
if (auto maybe_idx = GetIndex(maybe_idx_cols)) {
o_idxs = std::move(*maybe_idx);
break;
}
maybe_idx_cols.pop_back();
}

// If we can't use the index just apply constraints in a standard way.
if (maybe_idx_cols.empty()) {
return rm;
}

for (uint32_t i = 0; i < maybe_idx_cols.size(); i++) {
const Constraint& c = c_vec[i];

Range r =
ChainForColumn(c.col_idx).OrderedIndexSearch(c.op, c.value, o_idxs);
o_idxs.data += r.start;
o_idxs.size = r.size();
}

std::vector<uint32_t> res_vec(o_idxs.data, o_idxs.data + o_idxs.size);
if (res_vec.size() < kIndexVectorThreshold) {
std::sort(res_vec.begin(), res_vec.end());
rm = RowMap(std::move(res_vec));
} else {
rm = RowMap(BitVector::FromUnsortedIndexVector(std::move(res_vec)));
}

c_vec.erase(c_vec.begin(),
c_vec.begin() + static_cast<uint32_t>(maybe_idx_cols.size()));
return rm;
}

RowMap Table::QueryToRowMap(const Query& q) const {
// We need to delay creation of the chains to this point because of Chrome
// does not want the binary size overhead of including the chain
Expand All @@ -105,51 +209,35 @@ RowMap Table::QueryToRowMap(const Query& q) const {
CreateChains();
}

// Apply the query constraints.
RowMap rm = QueryExecutor::FilterLegacy(this, q.constraints);
auto cs_copy = q.constraints;
RowMap rm = TryApplyIndex(cs_copy);

// Filter out constraints that are not using index.
for (const auto& c : cs_copy) {
QueryExecutor::ApplyConstraint(c, ChainForColumn(c.col_idx), &rm);
}

if (q.order_type != Query::OrderType::kSort) {
ApplyDistinct(q, &rm);
}

// Fastpath for one sort, no distinct and limit 1. This type of query means we
// need to run Max/Min on orderby column and there is no need for sorting.
if (q.order_type == Query::OrderType::kSort && q.orders.size() == 1 &&
q.limit.has_value() && *q.limit == 1) {
Order o = q.orders.front();
std::vector<uint32_t> table_indices = std::move(rm).TakeAsIndexVector();
auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
std::optional<Token> ret_tok;

if (o.desc) {
ret_tok = ChainForColumn(o.col_idx).MaxElement(indices);
} else {
ret_tok = ChainForColumn(o.col_idx).MinElement(indices);
}

if (!ret_tok.has_value()) {
return RowMap();
}

return RowMap(std::vector<uint32_t>{ret_tok->payload});
if (q.IsMinMaxQuery()) {
ApplyMinMaxQuery(rm, q.orders.front(),
ChainForColumn(q.orders.front().col_idx));
return rm;
}

if (q.order_type != Query::OrderType::kDistinct && !q.orders.empty()) {
if (q.RequireSort()) {
ApplySort(q, &rm);
}

if (!q.limit.has_value() && q.offset == 0) {
return rm;
}

uint32_t end = rm.size();
uint32_t start = std::min(q.offset, end);

if (q.limit) {
end = std::min(end, *q.limit + q.offset);
if (q.limit.has_value() || q.offset != 0) {
ApplyLimitAndOffset(rm, q);
}

return rm.SelectRows(RowMap(start, end));
return rm;
}

Table Table::Sort(const std::vector<Order>& ob) const {
Expand Down
2 changes: 2 additions & 0 deletions src/trace_processor/db/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ class Table {
void ApplyDistinct(const Query&, RowMap*) const;
void ApplySort(const Query&, RowMap*) const;

RowMap TryApplyIndex(std::vector<Constraint>&) const;

StringPool* string_pool_ = nullptr;
uint32_t row_count_ = 0;
std::vector<ColumnStorageOverlay> overlays_;
Expand Down

0 comments on commit c2a9824

Please sign in to comment.