Skip to content

Commit

Permalink
[Improvement](join) remove unnecessary state for join (apache#13472)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriel39 authored Oct 21, 2022
1 parent 1f7829e commit d3f65aa
Show file tree
Hide file tree
Showing 5 changed files with 858 additions and 613 deletions.
243 changes: 172 additions & 71 deletions be/src/vec/exec/join/join_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,119 +22,220 @@
#include "vec/core/block.h"

namespace doris::vectorized {
/// Reference to the row in block.
/**
* Now we have different kinds of RowRef for join operation. Overall, RowRef is the base class and
* the class inheritance is below:
* RowRef
* |
* ---------------------------------------------------------
* | | |
* RowRefListWithFlag RowRefList RowRefWithFlag
* |
* RowRefListWithFlags
*
* RowRef is a basic representation for a row which contains only row_num and block_offset.
*
* RowRefList is a list of many RowRefs. It used for join operations which doesn't need any flags to represent whether a row has already been visited.
*
* RowRefListWithFlag is a list of many RowRefs and an extra visited flag. It used for join operations which all RowRefs in a list has the same visited flag.
*
* RowRefWithFlag is a basic representation for a row with an extra visited flag.
*
* RowRefListWithFlags is a list of many RowRefWithFlags. This means each row will have different visited flags. It's used for join operation which has `other_conjuncts`.
*/
struct RowRef {
using SizeT = uint32_t; /// Do not use size_t cause of memory economy

SizeT row_num = 0;
uint8_t block_offset;
// Use in right join to mark row is visited
// TODO: opt the variable to use it only need
bool visited = false;

RowRef() {}
RowRef(size_t row_num_count, uint8_t block_offset_, bool is_visited = false)
: row_num(row_num_count), block_offset(block_offset_), visited(is_visited) {}
RowRef() = default;
RowRef(size_t row_num_count, uint8_t block_offset_)
: row_num(row_num_count), block_offset(block_offset_) {}
};

/// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs)
struct RowRefList : RowRef {
/// Portion of RowRefs, 16 * (MAX_SIZE + 1) bytes sized.
struct Batch {
static constexpr size_t MAX_SIZE = 7; /// Adequate values are 3, 7, 15, 31.
struct RowRefWithFlag : public RowRef {
bool visited;

SizeT size = 0; /// It's smaller than size_t but keeps align in Arena.
Batch* next;
RowRef row_refs[MAX_SIZE];
RowRefWithFlag() = default;
RowRefWithFlag(size_t row_num_count, uint8_t block_offset_, bool is_visited = false)
: RowRef(row_num_count, block_offset_), visited(is_visited) {}
};

Batch(Batch* parent) : next(parent) {}
/// Portion of RowRefs, 16 * (MAX_SIZE + 1) bytes sized.
template <typename RowRefType>
struct Batch {
static constexpr size_t MAX_SIZE = 7; /// Adequate values are 3, 7, 15, 31.

bool full() const { return size == MAX_SIZE; }
RowRef::SizeT size = 0; /// It's smaller than size_t but keeps align in Arena.
Batch<RowRefType>* next;
RowRefType row_refs[MAX_SIZE];

Batch* insert(RowRef&& row_ref, Arena& pool) {
if (full()) {
auto batch = pool.alloc<Batch>();
*batch = Batch(this);
batch->insert(std::move(row_ref), pool);
return batch;
}
Batch(Batch<RowRefType>* parent) : next(parent) {}

bool full() const { return size == MAX_SIZE; }

row_refs[size++] = std::move(row_ref);
return this;
Batch<RowRefType>* insert(RowRefType&& row_ref, Arena& pool) {
if (full()) {
auto batch = pool.alloc<Batch<RowRefType>>();
*batch = Batch<RowRefType>(this);
batch->insert(std::move(row_ref), pool);
return batch;
}
};

class ForwardIterator {
public:
ForwardIterator(RowRefList* begin)
: root(begin), first(true), batch(root->next), position(0) {}
row_refs[size++] = std::move(row_ref);
return this;
}
};

RowRef& operator*() {
if (first) return *root;
return batch->row_refs[position];
}
RowRef* operator->() { return &(**this); }
template <typename RowRefListType>
class ForwardIterator {
public:
using RowRefType = typename RowRefListType::RowRefType;
ForwardIterator(RowRefListType* begin)
: root(begin), first(true), batch(root->next), position(0) {}

bool operator==(const ForwardIterator& rhs) const {
if (ok() != rhs.ok()) {
return false;
}
if (first && rhs.first) {
return true;
}
return batch == rhs.batch && position == rhs.position;
RowRefType& operator*() {
if (first) return *root;
return batch->row_refs[position];
}
RowRefType* operator->() { return &(**this); }

bool operator==(const ForwardIterator<RowRefListType>& rhs) const {
if (ok() != rhs.ok()) {
return false;
}
if (first && rhs.first) {
return true;
}
bool operator!=(const ForwardIterator& rhs) const { return !(*this == rhs); }
return batch == rhs.batch && position == rhs.position;
}
bool operator!=(const ForwardIterator<RowRefListType>& rhs) const { return !(*this == rhs); }

void operator++() {
if (first) {
first = false;
return;
}
void operator++() {
if (first) {
first = false;
return;
}

if (batch) {
++position;
if (position >= batch->size) {
batch = batch->next;
position = 0;
}
if (batch) {
++position;
if (position >= batch->size) {
batch = batch->next;
position = 0;
}
}
}

bool ok() const { return first || batch; }
bool ok() const { return first || batch; }

static ForwardIterator end() { return ForwardIterator(); }
static ForwardIterator<RowRefListType> end() { return ForwardIterator(); }

private:
RowRefList* root;
bool first;
Batch* batch;
size_t position;
private:
RowRefListType* root;
bool first;
Batch<RowRefType>* batch;
size_t position;

ForwardIterator() : root(nullptr), first(false), batch(nullptr), position(0) {}
};

ForwardIterator() : root(nullptr), first(false), batch(nullptr), position(0) {}
};
struct RowRefList : RowRef {
using RowRefType = RowRef;

RowRefList() {}
RowRefList() = default;
RowRefList(size_t row_num_, uint8_t block_offset_) : RowRef(row_num_, block_offset_) {}

ForwardIterator begin() { return ForwardIterator(this); }
static ForwardIterator end() { return ForwardIterator::end(); }
ForwardIterator<RowRefList> begin() { return ForwardIterator<RowRefList>(this); }
static ForwardIterator<RowRefList> end() { return ForwardIterator<RowRefList>::end(); }

/// insert element after current one
void insert(RowRefType&& row_ref, Arena& pool) {
row_count++;

if (!next) {
next = pool.alloc<Batch<RowRefType>>();
*next = Batch<RowRefType>(nullptr);
}
next = next->insert(std::move(row_ref), pool);
}

uint32_t get_row_count() { return row_count; }

private:
friend class ForwardIterator<RowRefList>;

Batch<RowRefType>* next = nullptr;
uint32_t row_count = 1;
};

struct RowRefListWithFlag : RowRef {
using RowRefType = RowRef;

RowRefListWithFlag() = default;
RowRefListWithFlag(size_t row_num_, uint8_t block_offset_) : RowRef(row_num_, block_offset_) {}

ForwardIterator<RowRefListWithFlag> begin() {
return ForwardIterator<RowRefListWithFlag>(this);
}

static ForwardIterator<RowRefListWithFlag> end() {
return ForwardIterator<RowRefListWithFlag>::end();
}

/// insert element after current one
void insert(RowRef&& row_ref, Arena& pool) {
row_count++;

if (!next) {
next = pool.alloc<Batch>();
*next = Batch(nullptr);
next = pool.alloc<Batch<RowRefType>>();
*next = Batch<RowRefType>(nullptr);
}
next = next->insert(std::move(row_ref), pool);
}

uint32_t get_row_count() { return row_count; }

bool visited = false;

private:
friend class ForwardIterator<RowRefListWithFlag>;

Batch<RowRefType>* next = nullptr;
uint32_t row_count = 1;
};

struct RowRefListWithFlags : RowRefWithFlag {
using RowRefType = RowRefWithFlag;

RowRefListWithFlags() = default;
RowRefListWithFlags(size_t row_num_, uint8_t block_offset_)
: RowRefWithFlag(row_num_, block_offset_) {}

ForwardIterator<RowRefListWithFlags> begin() {
return ForwardIterator<RowRefListWithFlags>(this);
}
static ForwardIterator<RowRefListWithFlags> end() {
return ForwardIterator<RowRefListWithFlags>::end();
}

/// insert element after current one
void insert(RowRefWithFlag&& row_ref, Arena& pool) {
row_count++;

if (!next) {
next = pool.alloc<Batch<RowRefType>>();
*next = Batch<RowRefType>(nullptr);
}
next = next->insert(std::move(row_ref), pool);
}

uint32_t get_row_count() { return row_count; }

private:
Batch* next = nullptr;
friend class ForwardIterator<RowRefListWithFlags>;

Batch<RowRefType>* next = nullptr;
uint32_t row_count = 1;
};

Expand Down
Loading

0 comments on commit d3f65aa

Please sign in to comment.