-
Notifications
You must be signed in to change notification settings - Fork 82
clp-s: Add support for projecting of a subset of columns during search. #510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7b5c35f
433a1c0
4b99d5a
3af50c7
4e6224f
ca34490
c8f9689
3f86c62
cb4326f
691412e
8b3dbfa
20197e0
c83ebcc
680a4fb
343861b
b291d77
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#include "Projection.hpp" | ||
|
||
#include <algorithm> | ||
|
||
#include "SearchUtils.hpp" | ||
|
||
namespace clp_s::search { | ||
void Projection::add_column(std::shared_ptr<ColumnDescriptor> column) { | ||
if (column->is_unresolved_descriptor()) { | ||
throw OperationFailed(ErrorCodeBadParam, __FILE__, __LINE__); | ||
} | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (ProjectionMode::ReturnAllColumns == m_projection_mode) { | ||
throw OperationFailed(ErrorCodeUnsupported, __FILE__, __LINE__); | ||
} | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (m_selected_columns.end() | ||
!= std::find_if( | ||
m_selected_columns.begin(), | ||
m_selected_columns.end(), | ||
[column](auto const& rhs) -> bool { return *column == *rhs; } | ||
)) | ||
{ | ||
// no duplicate columns in projection | ||
throw OperationFailed(ErrorCodeBadParam, __FILE__, __LINE__); | ||
} | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
m_selected_columns.push_back(column); | ||
} | ||
|
||
void Projection::resolve_columns(std::shared_ptr<SchemaTree> tree) { | ||
for (auto& column : m_selected_columns) { | ||
resolve_column(tree, column); | ||
} | ||
} | ||
|
||
void Projection::resolve_column( | ||
std::shared_ptr<SchemaTree> tree, | ||
std::shared_ptr<ColumnDescriptor> column | ||
) { | ||
/** | ||
* Ideally we would reuse the code from SchemaMatch for resolving columns, but unfortunately we | ||
* can not. | ||
* | ||
* The main reason is that here we don't want to allow projection to travel inside unstructured | ||
* objects -- it may be possible to support such a thing in the future, but it poses some extra | ||
* challenges (e.g. deciding what to do when projecting repeated elements in a structure). | ||
* | ||
* It would be possible to create code that can handle our use-case and SchemaMatch's use-case | ||
* in an elegant way, but it's a significant refactor. In particular, if we extend our column | ||
* type system to be one-per-token instead of one-per-column we can make it so that intermediate | ||
* tokens will not match certain kinds of MPT nodes (like the node for structured arrays). | ||
* | ||
* In light of that we implement a simple version of column resolution here that does exactly | ||
* what we need. | ||
*/ | ||
|
||
auto cur_node_id = tree->get_root_node_id(); | ||
auto it = column->descriptor_begin(); | ||
while (it != column->descriptor_end()) { | ||
bool matched_any{false}; | ||
auto cur_it = it++; | ||
bool last_token = it == column->descriptor_end(); | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
auto const& cur_node = tree->get_node(cur_node_id); | ||
for (int32_t child_node_id : cur_node.get_children_ids()) { | ||
auto const& child_node = tree->get_node(child_node_id); | ||
|
||
// Intermediate nodes must be objects | ||
if (false == last_token && child_node.get_type() != NodeType::Object) { | ||
continue; | ||
} | ||
|
||
if (child_node.get_key_name() != cur_it->get_token()) { | ||
continue; | ||
} | ||
|
||
matched_any = true; | ||
if (last_token && column->matches_type(node_to_literal_type(child_node.get_type()))) { | ||
m_matching_nodes.insert(child_node_id); | ||
} else if (false == last_token) { | ||
cur_node_id = child_node_id; | ||
break; | ||
} | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
if (false == matched_any) { | ||
break; | ||
} | ||
gibber9809 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
} // namespace clp_s::search |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,82 @@ | ||||||||||||||||||||||||||
#ifndef CLP_S_SEARCH_PROJECTION_HPP | ||||||||||||||||||||||||||
#define CLP_S_SEARCH_PROJECTION_HPP | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#include <vector> | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#include <absl/container/flat_hash_set.h> | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#include "../SchemaTree.hpp" | ||||||||||||||||||||||||||
#include "../TraceableException.hpp" | ||||||||||||||||||||||||||
#include "ColumnDescriptor.hpp" | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
namespace clp_s::search { | ||||||||||||||||||||||||||
enum ProjectionMode : uint8_t { | ||||||||||||||||||||||||||
ReturnAllColumns, | ||||||||||||||||||||||||||
ReturnSelectedColumns | ||||||||||||||||||||||||||
}; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
/** | ||||||||||||||||||||||||||
* This class describes the set of columns that should be included in the projected results. | ||||||||||||||||||||||||||
* | ||||||||||||||||||||||||||
* After adding columns and before calling matches_node the caller is responsible for calling | ||||||||||||||||||||||||||
* resolve_columns. | ||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||
class Projection { | ||||||||||||||||||||||||||
public: | ||||||||||||||||||||||||||
class OperationFailed : public TraceableException { | ||||||||||||||||||||||||||
public: | ||||||||||||||||||||||||||
// Constructors | ||||||||||||||||||||||||||
OperationFailed(ErrorCode error_code, char const* const filename, int line_number) | ||||||||||||||||||||||||||
: TraceableException(error_code, filename, line_number) {} | ||||||||||||||||||||||||||
}; | ||||||||||||||||||||||||||
Comment on lines
+26
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider renaming The exception class Apply this diff to rename the exception class: - class OperationFailed : public TraceableException {
+ class ProjectionOperationFailed : public TraceableException { Remember to update all references to 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
explicit Projection(ProjectionMode mode) : m_projection_mode{mode} {} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
/** | ||||||||||||||||||||||||||
* Adds a column to the set of columns that should be included in the projected results | ||||||||||||||||||||||||||
* @param column | ||||||||||||||||||||||||||
* @throws OperationFailed if `column` contains a wildcard | ||||||||||||||||||||||||||
* @throws OperationFailed if this instance of Projection is in mode ReturnAllColumns | ||||||||||||||||||||||||||
* @throws OperationFailed if `column` is identical to a previously added column | ||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||
void add_column(std::shared_ptr<ColumnDescriptor> column); | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
/** | ||||||||||||||||||||||||||
* Resolves all columns for the purpose of projection. This key resolution implementation is | ||||||||||||||||||||||||||
* more limited than the one in schema matching. In particular, this version of key resolution | ||||||||||||||||||||||||||
* only allows resolving keys that do not contain wildcards and does not allow resolving to | ||||||||||||||||||||||||||
* objects within arrays. | ||||||||||||||||||||||||||
* | ||||||||||||||||||||||||||
* Note: we could try to generalize column resolution code/move it to the schema tree. It is | ||||||||||||||||||||||||||
* probably best to write a simpler version dedicated to projection for now since types are | ||||||||||||||||||||||||||
* leaf-only. The type-per-token idea solves this problem (in the absence of wildcards). | ||||||||||||||||||||||||||
* | ||||||||||||||||||||||||||
* @param tree | ||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||
void resolve_columns(std::shared_ptr<SchemaTree> tree); | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a description for this method? |
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
/** | ||||||||||||||||||||||||||
* Checks whether a column corresponding to given leaf node should be included in the output | ||||||||||||||||||||||||||
* @param node_id | ||||||||||||||||||||||||||
* @return true if the column should be included in the output, false otherwise | ||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||
bool matches_node(int32_t node_id) const { | ||||||||||||||||||||||||||
return ProjectionMode::ReturnAllColumns == m_projection_mode | ||||||||||||||||||||||||||
|| m_matching_nodes.contains(node_id); | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
private: | ||||||||||||||||||||||||||
/** | ||||||||||||||||||||||||||
* Resolves an individual column as described by the `resolve_columns` method. | ||||||||||||||||||||||||||
* @param tree | ||||||||||||||||||||||||||
* @param column | ||||||||||||||||||||||||||
*/ | ||||||||||||||||||||||||||
void resolve_column(std::shared_ptr<SchemaTree> tree, std::shared_ptr<ColumnDescriptor> column); | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also for this method. |
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
std::vector<std::shared_ptr<ColumnDescriptor>> m_selected_columns; | ||||||||||||||||||||||||||
absl::flat_hash_set<int32_t> m_matching_nodes; | ||||||||||||||||||||||||||
ProjectionMode m_projection_mode{ProjectionMode::ReturnAllColumns}; | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove redundant in-class initialization of Since Apply this diff to remove the redundant initialization: - ProjectionMode m_projection_mode{ProjectionMode::ReturnAllColumns};
+ ProjectionMode m_projection_mode; 📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||
}; | ||||||||||||||||||||||||||
} // namespace clp_s::search | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#endif // CLP_S_SEARCH_PROJECTION_HPP |
Uh oh!
There was an error while loading. Please reload this page.