From c4286099ff88b180a4f4dc46743c6df94331d023 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Wed, 7 Oct 2020 09:00:55 -0700 Subject: [PATCH] feat: add UUID column to ImportMixin (#11098) * Add UUID column to ImportMixin * Fix default value * Fix lint * Fix order of downgrade * Add logging when downgrade fails * Migrate position_json to contain UUIDs, and add schedule tables * Save UUID when adding charts to dashboard * Fix heads * Rename migration file * Fix dashboard serialization * Fix migration script with Postgres * Fix unique contraint name * Handle UUID when exporting dashboard * Fix Dataset PUT * Add UUID JSON serialization * Fix tests * Simplify logic * Try binary=True --- superset/dashboards/dao.py | 11 ++ superset/datasets/schemas.py | 1 + ...0de1855_add_uuid_column_to_import_mixin.py | 154 ++++++++++++++++++ superset/models/helpers.py | 6 + superset/models/slice.py | 6 +- superset/utils/core.py | 4 +- superset/views/core.py | 2 +- tests/databases/api_tests.py | 2 +- tests/dict_import_export_tests.py | 20 ++- tests/import_export_tests.py | 4 +- 10 files changed, 198 insertions(+), 12 deletions(-) create mode 100644 superset/migrations/versions/b56500de1855_add_uuid_column_to_import_mixin.py diff --git a/superset/dashboards/dao.py b/superset/dashboards/dao.py index bddc467dd5533..46ce63a4f7b9a 100644 --- a/superset/dashboards/dao.py +++ b/superset/dashboards/dao.py @@ -99,6 +99,17 @@ def set_dash_metadata( dashboard.slices = current_slices + # add UUID to positions + uuid_map = {slice.id: str(slice.uuid) for slice in current_slices} + for obj in positions.values(): + if ( + isinstance(obj, dict) + and obj["type"] == "CHART" + and obj["meta"]["chartId"] + ): + chart_id = obj["meta"]["chartId"] + obj["meta"]["uuid"] = uuid_map[chart_id] + # remove leading and trailing white spaces in the dumped json dashboard.position_json = json.dumps( positions, indent=None, separators=(",", ":"), sort_keys=True diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py index 46618d79aaa44..a4341db15d4cc 100644 --- a/superset/datasets/schemas.py +++ b/superset/datasets/schemas.py @@ -52,6 +52,7 @@ class DatasetColumnsPutSchema(Schema): python_date_format = fields.String( allow_none=True, validate=[Length(1, 255), validate_python_date_format] ) + uuid = fields.String(allow_none=True) class DatasetMetricsPutSchema(Schema): diff --git a/superset/migrations/versions/b56500de1855_add_uuid_column_to_import_mixin.py b/superset/migrations/versions/b56500de1855_add_uuid_column_to_import_mixin.py new file mode 100644 index 0000000000000..19903b94c4f63 --- /dev/null +++ b/superset/migrations/versions/b56500de1855_add_uuid_column_to_import_mixin.py @@ -0,0 +1,154 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""add_uuid_column_to_import_mixin + +Revision ID: b56500de1855 +Revises: 18532d70ab98 +Create Date: 2020-09-28 17:57:23.128142 + +""" +import json +import logging +import uuid + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy_utils import UUIDType + +from superset import db +from superset.utils import core as utils + +# revision identifiers, used by Alembic. +revision = "b56500de1855" +down_revision = "18532d70ab98" + + +Base = declarative_base() + + +class ImportMixin: + id = sa.Column(sa.Integer, primary_key=True) + uuid = sa.Column(UUIDType(binary=True), primary_key=False, default=uuid.uuid4) + + +table_names = [ + # Core models + "dbs", + "dashboards", + "slices", + # SQLAlchemy connectors + "tables", + "table_columns", + "sql_metrics", + # Druid connector + "clusters", + "datasources", + "columns", + "metrics", + # Dashboard email schedules + "dashboard_email_schedules", + "slice_email_schedules", +] +models = { + table_name: type(table_name, (Base, ImportMixin), {"__tablename__": table_name}) + for table_name in table_names +} + +models["dashboards"].position_json = sa.Column(utils.MediumText()) + + +def add_uuids(objects, session, batch_size=100): + uuid_map = {} + count = len(objects) + for i, object_ in enumerate(objects): + object_.uuid = uuid.uuid4() + uuid_map[object_.id] = object_.uuid + session.merge(object_) + if (i + 1) % batch_size == 0: + session.commit() + print(f"uuid assigned to {i + 1} out of {count}") + + session.commit() + print(f"Done! Assigned {count} uuids") + + return uuid_map + + +def update_position_json(dashboard, session, uuid_map): + layout = json.loads(dashboard.position_json or "{}") + for object_ in layout.values(): + if ( + isinstance(object_, dict) + and object_["type"] == "CHART" + and object_["meta"]["chartId"] + ): + chart_id = object_["meta"]["chartId"] + if chart_id in uuid_map: + object_["meta"]["uuid"] = str(uuid_map[chart_id]) + elif object_["meta"].get("uuid"): + del object_["meta"]["uuid"] + + dashboard.position_json = json.dumps(layout, indent=4) + session.merge(dashboard) + session.commit() + + +def upgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + uuid_maps = {} + for table_name, model in models.items(): + with op.batch_alter_table(table_name) as batch_op: + batch_op.add_column( + sa.Column( + "uuid", + UUIDType(binary=True), + primary_key=False, + default=uuid.uuid4, + ) + ) + + # populate column + objects = session.query(model).all() + uuid_maps[table_name] = add_uuids(objects, session) + + # add uniqueness constraint + with op.batch_alter_table(table_name) as batch_op: + batch_op.create_unique_constraint(f"uq_{table_name}_uuid", ["uuid"]) + + # add UUID to Dashboard.position_json + Dashboard = models["dashboards"] + for dashboard in session.query(Dashboard).all(): + update_position_json(dashboard, session, uuid_maps["slices"]) + + +def downgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + # remove uuid from position_json + Dashboard = models["dashboards"] + for dashboard in session.query(Dashboard).all(): + update_position_json(dashboard, session, {}) + + # remove uuid column + for table_name, model in models.items(): + with op.batch_alter_table(model) as batch_op: + batch_op.drop_constraint(f"uq_{table_name}_uuid") + batch_op.drop_column("uuid") diff --git a/superset/models/helpers.py b/superset/models/helpers.py index dd3a068d9425e..08301a7fe1ea2 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -18,6 +18,7 @@ import json import logging import re +import uuid from datetime import datetime, timedelta from json.decoder import JSONDecodeError from typing import Any, Dict, List, Optional, Set, Union @@ -35,6 +36,7 @@ from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import Mapper, Session from sqlalchemy.orm.exc import MultipleResultsFound +from sqlalchemy_utils import UUIDType from superset.utils.core import QueryStatus @@ -51,6 +53,10 @@ def json_to_dict(json_str: str) -> Dict[Any, Any]: class ImportMixin: + uuid = sa.Column( + UUIDType(binary=True), primary_key=False, unique=True, default=uuid.uuid4 + ) + export_parent: Optional[str] = None # The name of the attribute # with the SQL Alchemy back reference diff --git a/superset/models/slice.py b/superset/models/slice.py index a442d8ee9abd4..23187d98c1a71 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -198,15 +198,15 @@ def data(self) -> Dict[str, Any]: @property def digest(self) -> str: """ - Returns a MD5 HEX digest that makes this dashboard unique + Returns a MD5 HEX digest that makes this dashboard unique """ return utils.md5_hex(self.params) @property def thumbnail_url(self) -> str: """ - Returns a thumbnail URL with a HEX digest. We want to avoid browser cache - if the dashboard has changed + Returns a thumbnail URL with a HEX digest. We want to avoid browser cache + if the dashboard has changed """ return f"/api/v1/chart/{self.id}/thumbnail/{self.digest}/" diff --git a/superset/utils/core.py b/superset/utils/core.py index 15e2678bca979..541d24b411d07 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -310,7 +310,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.sort_keys = True - def default(self, o: Any) -> Dict[Any, Any]: + def default(self, o: Any) -> Union[Dict[Any, Any], str]: + if isinstance(o, uuid.UUID): + return str(o) try: vals = {k: v for k, v in o.__dict__.items() if k != "_sa_instance_state"} return {"__{}__".format(o.__class__.__name__): vals} diff --git a/superset/views/core.py b/superset/views/core.py index 9d9caafc4107c..fd6a1133997db 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -1784,7 +1784,7 @@ def sync_druid_source(self) -> FlaskResponse: # pylint: disable=no-self-use @expose("/get_or_create_table/", methods=["POST"]) @event_logger.log_this def sqllab_table_viz(self) -> FlaskResponse: # pylint: disable=no-self-use - """ Gets or creates a table object with attributes passed to the API. + """Gets or creates a table object with attributes passed to the API. It expects the json with params: * datasourceName - e.g. table name, required diff --git a/tests/databases/api_tests.py b/tests/databases/api_tests.py index b0f853b6d2888..a5ce0b408021f 100644 --- a/tests/databases/api_tests.py +++ b/tests/databases/api_tests.py @@ -328,8 +328,8 @@ def test_create_database_fail_sqllite(self): ] } } - self.assertEqual(response.status_code, 400) self.assertEqual(response_data, expected_response) + self.assertEqual(response.status_code, 400) def test_create_database_conn_fail(self): """ diff --git a/tests/dict_import_export_tests.py b/tests/dict_import_export_tests.py index dc0b8d8d875a4..176ecd07549e4 100644 --- a/tests/dict_import_export_tests.py +++ b/tests/dict_import_export_tests.py @@ -18,6 +18,7 @@ """Unit tests for Superset""" import json import unittest +from uuid import uuid4 import yaml @@ -64,26 +65,33 @@ def setUpClass(cls): def tearDownClass(cls): cls.delete_imports() - def create_table(self, name, schema="", id=0, cols_names=[], metric_names=[]): + def create_table( + self, name, schema="", id=0, cols_names=[], cols_uuids=None, metric_names=[] + ): database_name = "main" name = "{0}{1}".format(NAME_PREFIX, name) params = {DBREF: id, "database_name": database_name} + if cols_uuids is None: + cols_uuids = [None] * len(cols_names) + dict_rep = { "database_id": get_example_database().id, "table_name": name, "schema": schema, "id": id, "params": json.dumps(params), - "columns": [{"column_name": c} for c in cols_names], + "columns": [ + {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids) + ], "metrics": [{"metric_name": c, "expression": ""} for c in metric_names], } table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) - for col_name in cols_names: - table.columns.append(TableColumn(column_name=col_name)) + for col_name, uuid in zip(cols_names, cols_uuids): + table.columns.append(TableColumn(column_name=col_name, uuid=uuid)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) return table, dict_rep @@ -171,6 +179,7 @@ def test_import_table_1_col_1_met(self): "table_1_col_1_met", id=ID_PREFIX + 2, cols_names=["col1"], + cols_uuids=[uuid4()], metric_names=["metric1"], ) imported_table = SqlaTable.import_from_dict(db.session, dict_table) @@ -187,6 +196,7 @@ def test_import_table_2_col_2_met(self): "table_2_col_2_met", id=ID_PREFIX + 3, cols_names=["c1", "c2"], + cols_uuids=[uuid4(), uuid4()], metric_names=["m1", "m2"], ) imported_table = SqlaTable.import_from_dict(db.session, dict_table) @@ -217,6 +227,7 @@ def test_import_table_override_append(self): id=ID_PREFIX + 3, metric_names=["new_metric1", "m1"], cols_names=["col1", "new_col1", "col2", "col3"], + cols_uuids=[col.uuid for col in imported_over.columns], ) self.assert_table_equals(expected_table, imported_over) self.yaml_compare( @@ -247,6 +258,7 @@ def test_import_table_override_sync(self): id=ID_PREFIX + 3, metric_names=["new_metric1"], cols_names=["new_col1", "col2", "col3"], + cols_uuids=[col.uuid for col in imported_over.columns], ) self.assert_table_equals(expected_table, imported_over) self.yaml_compare( diff --git a/tests/import_export_tests.py b/tests/import_export_tests.py index e772d16b136b0..ed6ae78470c0d 100644 --- a/tests/import_export_tests.py +++ b/tests/import_export_tests.py @@ -225,8 +225,8 @@ def assert_slice_equals(self, expected_slc, actual_slc): self.assertEqual(exp_params, actual_params) def assert_only_exported_slc_fields(self, expected_dash, actual_dash): - """ only exported json has this params - imported/created dashboard has relationships to other models instead + """only exported json has this params + imported/created dashboard has relationships to other models instead """ expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "") actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "")