Skip to content

Commit

Permalink
Remove Render Groups (Step 6) (#7131)
Browse files Browse the repository at this point in the history
Penultimate step:

* Remove render group column creation and population
* Fix tests accordingly
* Add migration mechanism to drop render group columns
* Also flush HeavyConnect foreign table cache for modified tables
* Add config option to explicitly enable migration

By default, migration is disabled.
Poly tables will present render group column as separate logical column.
They can be dropped manually, or allow migration to occur on all tables by enabling config.
Once migration has occurred, tables are no longer backwards-compatible with older builds.
ENABLE MIGRATION ON A SYSTEM ONLY WHEN IT NO LONGER NEEDS TO BE REVERTED!

RESTORE TABLE of dumps of tables with poly columns will not work at all for now.
This will be resolved ASAP in a follow-up PR.
If necessary, revert to an older build, restore the dump, and then switch back to the newer build.

Signed-off-by: jack <jack@omnisci.com>
  • Loading branch information
simoneves authored and jack-mapd committed Jan 23, 2023
1 parent d9d84dd commit a64974d
Show file tree
Hide file tree
Showing 23 changed files with 251 additions and 303 deletions.
17 changes: 5 additions & 12 deletions Catalog/Catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,11 @@ void Catalog::checkDateInDaysColumnMigration() {
tableDescriptorMapById_, getCurrentDB().dbId, this, sqliteConnector_);
}

void Catalog::checkDropRenderGroupColumnsMigration() {
// do not take cat_sqlite_lock here as Catalog functions do that themselves
migrations::MigrationMgr::dropRenderGroupColumns(tableDescriptorMapById_, this);
}

void Catalog::createDashboardSystemRoles() {
std::unordered_map<std::string, std::pair<int, std::string>> dashboards;
std::vector<std::string> dashboard_ids;
Expand Down Expand Up @@ -2668,12 +2673,6 @@ void Catalog::expandGeoColumn(const ColumnDescriptor& cd,
physical_cd_bounds.columnType = bounds_ti;
columns.push_back(physical_cd_bounds);

ColumnDescriptor physical_cd_render_group(true);
physical_cd_render_group.columnName = cd.columnName + "_render_group";
SQLTypeInfo render_group_ti = SQLTypeInfo(kINT, col_ti.get_notnull());
physical_cd_render_group.columnType = render_group_ti;
columns.push_back(physical_cd_render_group);

// If adding more physical columns - update SQLTypeInfo::get_physical_cols()

break;
Expand Down Expand Up @@ -2709,12 +2708,6 @@ void Catalog::expandGeoColumn(const ColumnDescriptor& cd,
physical_cd_bounds.columnType = bounds_ti;
columns.push_back(physical_cd_bounds);

ColumnDescriptor physical_cd_render_group(true);
physical_cd_render_group.columnName = cd.columnName + "_render_group";
SQLTypeInfo render_group_ti = SQLTypeInfo(kINT, col_ti.get_notnull());
physical_cd_render_group.columnType = render_group_ti;
columns.push_back(physical_cd_render_group);

// If adding more physical columns - update SQLTypeInfo::get_physical_cols()

break;
Expand Down
1 change: 1 addition & 0 deletions Catalog/Catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ class Catalog final {
const std::string& new_owner);

bool isInfoSchemaDb() const;
void checkDropRenderGroupColumnsMigration();

protected:
void CheckAndExecuteMigrations();
Expand Down
13 changes: 13 additions & 0 deletions Catalog/SysCatalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3190,6 +3190,19 @@ void SysCatalog::rebuildObjectMapsUnlocked() {
buildObjectDescriptorMapUnlocked();
}

void SysCatalog::checkDropRenderGroupColumnsMigration() const {
sys_write_lock write_lock(this);
sys_sqlite_lock sqlite_lock(this);
static const std::string drop_render_groups_migration{"drop_render_groups"};
if (!hasExecutedMigration(drop_render_groups_migration)) {
auto cats = Catalog_Namespace::SysCatalog::instance().getCatalogsForAllDbs();
for (auto& cat : cats) {
cat->checkDropRenderGroupColumnsMigration();
}
recordExecutedMigration(drop_render_groups_migration);
}
}

const TableDescriptor* get_metadata_for_table(const ::shared::TableKey& table_key,
bool populate_fragmenter) {
const auto catalog = SysCatalog::instance().getCatalog(table_key.db_id);
Expand Down
2 changes: 1 addition & 1 deletion Catalog/SysCatalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ class SysCatalog : private CommonFileOperations {
const Catalog_Namespace::Catalog& catalog);

bool hasExecutedMigration(const std::string& migration_name) const;

void checkDropRenderGroupColumnsMigration() const;
private:
using GranteeMap = std::map<std::string, std::unique_ptr<Grantee>>;
using ObjectRoleDescriptorMap =
Expand Down
84 changes: 8 additions & 76 deletions DataMgr/ForeignStorage/GeospatialEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,11 @@ class GeospatialEncoder {
, bounds_column_encoder_(nullptr)
, ring_or_line_sizes_column_encoder_(nullptr)
, poly_rings_column_encoder_(nullptr)
, render_group_column_encoder_(nullptr)
, base_column_metadata_(nullptr)
, coords_column_metadata_(nullptr)
, bounds_column_metadata_(nullptr)
, ring_or_line_sizes_column_metadata_(nullptr)
, poly_rings_column_metadata_(nullptr)
, render_group_column_metadata_(nullptr) {
, poly_rings_column_metadata_(nullptr) {
CHECK(geo_column_descriptor_->columnType.is_geometry());
validateChunksSizing(chunks);
const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
Expand All @@ -73,10 +71,6 @@ class GeospatialEncoder {
ring_or_line_sizes_column_descriptor_ =
getColumnDescriptor(chunks, geo_column_type, RING_OR_LINE_SIZES);
}
if (hasRenderGroupColumn()) {
render_group_column_descriptor_ =
getColumnDescriptor(chunks, geo_column_type, RENDER_GROUP);
}

// initialize poly rings column
if (hasPolyRingsColumn()) {
Expand All @@ -93,13 +87,11 @@ class GeospatialEncoder {
, bounds_column_encoder_(nullptr)
, ring_or_line_sizes_column_encoder_(nullptr)
, poly_rings_column_encoder_(nullptr)
, render_group_column_encoder_(nullptr)
, base_column_metadata_(nullptr)
, coords_column_metadata_(nullptr)
, bounds_column_metadata_(nullptr)
, ring_or_line_sizes_column_metadata_(nullptr)
, poly_rings_column_metadata_(nullptr)
, render_group_column_metadata_(nullptr) {
, poly_rings_column_metadata_(nullptr) {
CHECK(geo_column_descriptor_->columnType.is_geometry());

validateChunksSizing(chunks);
Expand Down Expand Up @@ -136,13 +128,6 @@ class GeospatialEncoder {
initEncoderAndGetEncoderAndMetadata(
chunks, chunk_metadata, geo_column_type, RING_OR_LINE_SIZES);
}
if (hasRenderGroupColumn()) {
std::tie(render_group_column_encoder_,
render_group_column_metadata_,
render_group_column_descriptor_) =
initEncoderAndGetEncoderAndMetadata(
chunks, chunk_metadata, geo_column_type, RENDER_GROUP);
}

// initialize poly rings column
if (hasPolyRingsColumn()) {
Expand All @@ -155,48 +140,21 @@ class GeospatialEncoder {
}

protected:
void appendBaseAndRenderGroupDataAndUpdateMetadata(const int64_t row_count) {
void appendBaseDataAndUpdateMetadata(const int64_t row_count) {
base_values_.resize(row_count);
*base_column_metadata_ =
*base_column_encoder_->appendData(&base_values_, 0, row_count);
if (hasRenderGroupColumn()) {
CHECK_EQ(render_group_value_buffer_.size(), static_cast<size_t>(row_count))
<< "Render Group Values not generated correctly!";
auto data_ptr = reinterpret_cast<int8_t*>(render_group_value_buffer_.data());
*render_group_column_metadata_ = *render_group_column_encoder_->appendData(
data_ptr, row_count, render_group_column_descriptor_->columnType);
}
}

void validateChunksSizing(std::list<Chunk_NS::Chunk>& chunks) const {
const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
if (geo_column_type == kPOINT) {
CHECK(chunks.size() == 2);
} else if (geo_column_type == kLINESTRING || geo_column_type == kMULTIPOINT) {
CHECK(chunks.size() == 3);
} else if (geo_column_type == kMULTILINESTRING) {
CHECK(chunks.size() == 4);
} else if (geo_column_type == kPOLYGON) {
CHECK(chunks.size() == 5);
} else if (geo_column_type == kMULTIPOLYGON) {
CHECK(chunks.size() == 6);
}
size_t expected_size = geo_column_descriptor_->columnType.get_physical_cols() + 1;
CHECK_EQ(chunks.size(), expected_size);
}

void validateMetadataSizing(
std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata) const {
const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
if (geo_column_type == kPOINT) {
CHECK(chunk_metadata.size() == 2);
} else if (geo_column_type == kLINESTRING || geo_column_type == kMULTIPOINT) {
CHECK(chunk_metadata.size() == 3);
} else if (geo_column_type == kMULTILINESTRING) {
CHECK(chunk_metadata.size() == 4);
} else if (geo_column_type == kPOLYGON) {
CHECK(chunk_metadata.size() == 5);
} else if (geo_column_type == kMULTIPOLYGON) {
CHECK(chunk_metadata.size() == 6);
}
size_t expected_size = geo_column_descriptor_->columnType.get_physical_cols() + 1;
CHECK_EQ(chunk_metadata.size(), expected_size);
}

void appendArrayDatumsToBufferAndUpdateMetadata() {
Expand Down Expand Up @@ -276,10 +234,6 @@ class GeospatialEncoder {
poly_rings_datum_buffer_.emplace_back(
encode_as_array_datum(poly_rings_parse_buffer_));
}

if (hasRenderGroupColumn()) {
render_group_value_buffer_.emplace_back(0);
}
}

void processNullGeoElement() {
Expand Down Expand Up @@ -314,10 +268,6 @@ class GeospatialEncoder {
import_export::ImporterUtils::composeNullArray(
poly_rings_column_descriptor_->columnType));
}
if (hasRenderGroupColumn()) {
static constexpr int32_t kNullRenderGroupValue = -1;
render_group_value_buffer_.emplace_back(kNullRenderGroupValue);
}
}

void clearParseBuffers() {
Expand All @@ -332,10 +282,9 @@ class GeospatialEncoder {
bounds_datum_buffer_.clear();
ring_or_line_sizes_datum_buffer_.clear();
poly_rings_datum_buffer_.clear();
render_group_value_buffer_.clear();
}

enum GeoColumnType { COORDS, BOUNDS, RING_OR_LINE_SIZES, POLY_RINGS, RENDER_GROUP };
enum GeoColumnType { COORDS, BOUNDS, RING_OR_LINE_SIZES, POLY_RINGS };

template <typename T>
typename std::list<T>::iterator getIteratorForGeoColumnType(
Expand Down Expand Up @@ -388,10 +337,6 @@ class GeospatialEncoder {
if (geo_column == BOUNDS) {
return list_iter;
}
list_iter++;
if (geo_column == RENDER_GROUP) {
return list_iter;
}
UNREACHABLE();
}
case kMULTIPOLYGON: {
Expand All @@ -410,10 +355,6 @@ class GeospatialEncoder {
if (geo_column == BOUNDS) {
return list_iter;
}
list_iter++;
if (geo_column == RENDER_GROUP) {
return list_iter;
}
UNREACHABLE();
}
default:
Expand Down Expand Up @@ -471,11 +412,6 @@ class GeospatialEncoder {
geo_column_type == kMULTILINESTRING;
}

bool hasRenderGroupColumn() const {
const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
return geo_column_type == kPOLYGON || geo_column_type == kMULTIPOLYGON;
}

bool hasPolyRingsColumn() const {
const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
return geo_column_type == kMULTIPOLYGON;
Expand All @@ -490,20 +426,17 @@ class GeospatialEncoder {
Encoder* bounds_column_encoder_;
Encoder* ring_or_line_sizes_column_encoder_;
Encoder* poly_rings_column_encoder_;
Encoder* render_group_column_encoder_;

ChunkMetadata* base_column_metadata_;
ChunkMetadata* coords_column_metadata_;
ChunkMetadata* bounds_column_metadata_;
ChunkMetadata* ring_or_line_sizes_column_metadata_;
ChunkMetadata* poly_rings_column_metadata_;
ChunkMetadata* render_group_column_metadata_;

const ColumnDescriptor* coords_column_descriptor_;
const ColumnDescriptor* bounds_column_descriptor_;
const ColumnDescriptor* ring_or_line_sizes_column_descriptor_;
const ColumnDescriptor* poly_rings_column_descriptor_;
const ColumnDescriptor* render_group_column_descriptor_;

std::vector<std::string> base_values_;

Expand All @@ -519,7 +452,6 @@ class GeospatialEncoder {
std::vector<ArrayDatum> bounds_datum_buffer_;
std::vector<ArrayDatum> ring_or_line_sizes_datum_buffer_;
std::vector<ArrayDatum> poly_rings_datum_buffer_;
std::vector<int32_t> render_group_value_buffer_;
};

} // namespace foreign_storage
2 changes: 1 addition & 1 deletion DataMgr/ForeignStorage/OdbcGeospatialEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class OdbcGeospatialEncoder : public GeospatialEncoder {
}
}
appendArrayDatumsToBufferAndUpdateMetadata();
appendBaseAndRenderGroupDataAndUpdateMetadata(num_rows);
appendBaseDataAndUpdateMetadata(num_rows);
}

private:
Expand Down
2 changes: 1 addition & 1 deletion DataMgr/ForeignStorage/ParquetGeospatialEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class ParquetGeospatialEncoder : public ParquetEncoder, public GeospatialEncoder

appendArrayDatumsToBufferAndUpdateMetadata();

appendBaseAndRenderGroupDataAndUpdateMetadata(levels_read);
appendBaseDataAndUpdateMetadata(levels_read);

if (is_error_tracking_enabled_) {
current_chunk_offset_ += levels_read;
Expand Down
21 changes: 3 additions & 18 deletions DataMgr/ForeignStorage/ParquetGeospatialImportEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ class ParquetGeospatialImportEncoder : public ParquetEncoder,
, coords_column_buffer_(nullptr)
, bounds_column_buffer_(nullptr)
, ring_or_line_sizes_column_buffer_(nullptr)
, poly_rings_column_buffer_(nullptr)
, render_group_column_buffer_(nullptr) {
, poly_rings_column_buffer_(nullptr) {
CHECK(geo_column_descriptor_->columnType.is_geometry());

const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
Expand Down Expand Up @@ -72,10 +71,6 @@ class ParquetGeospatialImportEncoder : public ParquetEncoder,
getBuffer(chunks, geo_column_type, RING_OR_LINE_SIZES));
CHECK(ring_or_line_sizes_column_buffer_);
}
if (hasRenderGroupColumn()) {
render_group_column_buffer_ = getBuffer(chunks, geo_column_type, RENDER_GROUP);
CHECK(render_group_column_buffer_);
}

// initialize poly rings column
if (hasPolyRingsColumn()) {
Expand Down Expand Up @@ -112,11 +107,6 @@ class ParquetGeospatialImportEncoder : public ParquetEncoder,
if (hasPolyRingsColumn()) {
poly_rings_column_buffer_->eraseInvalidData(invalid_indices);
}
if (hasRenderGroupColumn()) {
render_group_column_buffer_->setSize(
sizeof(int32_t) *
(render_group_column_buffer_->size() - invalid_indices.size()));
}
}

void appendData(const int16_t* def_levels,
Expand Down Expand Up @@ -151,7 +141,7 @@ class ParquetGeospatialImportEncoder : public ParquetEncoder,

appendArrayDatumsToBuffer();

appendBaseAndRenderGroupData(levels_read);
appendBaseData(levels_read);

current_batch_offset_ += levels_read;
}
Expand Down Expand Up @@ -184,14 +174,10 @@ class ParquetGeospatialImportEncoder : public ParquetEncoder,
appendArrayDatumsIfApplicable(poly_rings_column_buffer_, poly_rings_datum_buffer_);
}

void appendBaseAndRenderGroupData(const int64_t row_count) {
void appendBaseData(const int64_t row_count) {
for (int64_t i = 0; i < row_count; ++i) {
base_column_buffer_->appendElement("");
}
if (render_group_column_buffer_) {
auto data_ptr = reinterpret_cast<int8_t*>(render_group_value_buffer_.data());
render_group_column_buffer_->append(data_ptr, sizeof(int32_t) * row_count);
}
}

AbstractBuffer* getBuffer(std::list<Chunk_NS::Chunk>& chunks,
Expand All @@ -209,7 +195,6 @@ class ParquetGeospatialImportEncoder : public ParquetEncoder,
TypedParquetStorageBuffer<ArrayDatum>* bounds_column_buffer_;
TypedParquetStorageBuffer<ArrayDatum>* ring_or_line_sizes_column_buffer_;
TypedParquetStorageBuffer<ArrayDatum>* poly_rings_column_buffer_;
AbstractBuffer* render_group_column_buffer_;
};

} // namespace foreign_storage
Loading

0 comments on commit a64974d

Please sign in to comment.