Skip to content

Commit

Permalink
init table in join() without init of underlying t_columns
Browse files Browse the repository at this point in the history
  • Loading branch information
sc1f committed Jun 28, 2021
1 parent ddfcb3a commit a07d236
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 21 deletions.
48 changes: 28 additions & 20 deletions cpp/perspective/src/cpp/data_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,26 +99,28 @@ t_data_table::name() const {
}

void
t_data_table::init() {
t_data_table::init(bool make_columns) {
PSP_TRACE_SENTINEL();
LOG_INIT("t_data_table");
m_columns = std::vector<std::shared_ptr<t_column>>(m_schema.size());

#ifdef PSP_PARALLEL_FOR
tbb::parallel_for(0, int(m_schema.size()), 1,
[this](int idx)
#else
for (t_uindex idx = 0, loop_end = m_schema.size(); idx < loop_end; ++idx)
#endif
{
const std::string& colname = m_schema.m_columns[idx];
t_dtype dtype = m_schema.m_types[idx];
m_columns[idx] = make_column(colname, dtype, m_schema.m_status_enabled[idx]);
m_columns[idx]->init();
}
#ifdef PSP_PARALLEL_FOR
);
#endif
if (make_columns) {
#ifdef PSP_PARALLEL_FOR
tbb::parallel_for(0, int(m_schema.size()), 1,
[this](int idx)
#else
for (t_uindex idx = 0, loop_end = m_schema.size(); idx < loop_end; ++idx)
#endif
{
const std::string& colname = m_schema.m_columns[idx];
t_dtype dtype = m_schema.m_types[idx];
m_columns[idx] = make_column(colname, dtype, m_schema.m_status_enabled[idx]);
m_columns[idx]->init();
}
#ifdef PSP_PARALLEL_FOR
);
#endif
}

m_init = true;
}
Expand Down Expand Up @@ -639,21 +641,27 @@ t_data_table::join(std::shared_ptr<t_data_table> other_table) const {
}

std::shared_ptr<t_data_table> rval = std::make_shared<t_data_table>(
"", "", schema, get_capacity(), BACKING_STORE_MEMORY);
rval->init();
"", "", schema, DEFAULT_EMPTY_CAPACITY, BACKING_STORE_MEMORY);

// init() without initializing the t_columns on the returned table, as
// they will be immediately replaced by the columns from the tables
// we are joining together.
rval->init(false);

// borrow columns from the current table
for (const std::string& column : m_schema.m_columns) {
rval->set_column(column, get_column(column));
}

// and the columns we need from the other table
for (const std::string& column : other_columns) {
rval->set_column(column, other_table->get_column(column));
}

// don't set size on the columns - they are already of equal size, and we
// don't want to mutate the columns.
// Set size and capacity on the table only - don't mutate any of the
// columns.
rval->set_table_size(size());
rval->set_capacity(std::max(get_capacity(), other_table->get_capacity()));

return rval;
}
Expand Down
7 changes: 6 additions & 1 deletion cpp/perspective/src/include/perspective/data_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,12 @@ class PERSPECTIVE_EXPORT t_data_table {
t_uindex init_cap, t_backing_store backing_store);
~t_data_table();

void init();
/**
* @brief Initialize the `t_data_table`. If `make_columns` is True (the
* default option), construct and initialize the `t_column`s for the
* table.
*/
void init(bool make_columns = true);

const std::string& name() const;

Expand Down
37 changes: 37 additions & 0 deletions python/perspective/perspective/tests/table/test_view_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,43 @@ def data():
result = view.to_dict()
assert result["123"] == [123 for _ in range(300)]

def test_view_streaming_expression_limit(self):
def data():
return [{"a": random()} for _ in range(55)]

table = Table(data(), limit=50)
view = table.view(expressions=["123"])

for i in range(5):
table.update(data())

assert table.size() == 50
result = view.to_dict()
assert result["123"] == [123 for _ in range(50)]

def test_view_streaming_expression_one(self):
def data():
return [{"a": random()} for _ in range(50)]

table = Table(data())
view = table.view(row_pivots=["c0"], expressions=['//c0\n"a" * 2'])

for i in range(5):
table.update(data())

assert table.size() == 300

def test_view_streaming_expression_two(self):
def data():
return [{"a": random()} for _ in range(50)]

table = Table(data())
view = table.view(row_pivots=["c0"], column_pivots=["c1"], expressions=['//c0\n"a" * 2', "//c1\n'new string'"])

for i in range(5):
table.update(data())

assert table.size() == 300

def test_view_expression_create_no_alias(self):
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
Expand Down

0 comments on commit a07d236

Please sign in to comment.