Skip to content

Commit

Permalink
feat: load examples from config instead of code (apache#12026)
Browse files Browse the repository at this point in the history
* feat: load examples from config instead of code

* Remove database

* Update data URL
  • Loading branch information
betodealmeida authored Dec 15, 2020
1 parent e0079bb commit 5e811a1
Show file tree
Hide file tree
Showing 16 changed files with 444 additions and 207 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,polyline,prison,pyarrow,pyhive,pytest,pytz,redis,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,pyarrow,pyhive,pytest,pytz,redis,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
6 changes: 3 additions & 3 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,6 @@ def load_examples_run(
print("Loading [Birth names]")
examples.load_birth_names(only_metadata, force)

print("Loading [Unicode test data]")
examples.load_unicode_test_data(only_metadata, force)

if not load_test_data:
print("Loading [Random time series data]")
examples.load_random_time_series_data(only_metadata, force)
Expand Down Expand Up @@ -164,6 +161,9 @@ def load_examples_run(
print("Loading [Tabbed dashboard]")
examples.load_tabbed_dashboard(only_metadata)

# load examples that are stored as YAML config files
examples.load_from_configs()


@with_appcontext
@superset.command()
Expand Down
19 changes: 1 addition & 18 deletions superset/commands/importers/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from typing import Any, Dict, List, Optional, Set

from marshmallow import Schema, validate
Expand Down Expand Up @@ -106,7 +89,7 @@ def validate(self) -> None:
metadata = None

# validate that the type declared in METADATA_FILE_NAME is correct
if metadata:
if metadata and "type" in metadata:
type_validator = validate.Equal(self.dao.model_cls.__name__) # type: ignore
try:
type_validator(metadata["type"])
Expand Down
119 changes: 119 additions & 0 deletions superset/commands/importers/v1/examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from typing import Any, Dict, List, Tuple

from marshmallow import Schema
from sqlalchemy.orm import Session
from sqlalchemy.sql import select

from superset import db
from superset.charts.commands.importers.v1.utils import import_chart
from superset.charts.schemas import ImportV1ChartSchema
from superset.commands.exceptions import CommandException
from superset.commands.importers.v1 import ImportModelsCommand
from superset.dao.base import BaseDAO
from superset.dashboards.commands.importers.v1.utils import (
find_chart_uuids,
import_dashboard,
update_id_refs,
)
from superset.dashboards.schemas import ImportV1DashboardSchema
from superset.databases.commands.importers.v1.utils import import_database
from superset.databases.schemas import ImportV1DatabaseSchema
from superset.datasets.commands.importers.v1.utils import import_dataset
from superset.datasets.schemas import ImportV1DatasetSchema
from superset.models.core import Database
from superset.models.dashboard import dashboard_slices


class ImportExamplesCommand(ImportModelsCommand):

"""Import examples"""

dao = BaseDAO
model_name = "model"
schemas: Dict[str, Schema] = {
"charts/": ImportV1ChartSchema(),
"dashboards/": ImportV1DashboardSchema(),
"datasets/": ImportV1DatasetSchema(),
"databases/": ImportV1DatabaseSchema(),
}
import_error = CommandException

# pylint: disable=too-many-locals
@staticmethod
def _import(
session: Session, configs: Dict[str, Any], overwrite: bool = False
) -> None:
# import databases
database_ids: Dict[str, int] = {}
for file_name, config in configs.items():
if file_name.startswith("databases/"):
database = import_database(session, config, overwrite=overwrite)
database_ids[str(database.uuid)] = database.id

# import datasets
# TODO (betodealmeida): once we have all examples being imported we can
# have a stable UUID for the database stored in the dataset YAML; for
# now we need to fetch the current ID.
examples_id = (
db.session.query(Database).filter_by(database_name="examples").one().id
)
dataset_info: Dict[str, Dict[str, Any]] = {}
for file_name, config in configs.items():
if file_name.startswith("datasets/"):
config["database_id"] = examples_id
dataset = import_dataset(session, config, overwrite=overwrite)
dataset_info[str(dataset.uuid)] = {
"datasource_id": dataset.id,
"datasource_type": "view" if dataset.is_sqllab_view else "table",
"datasource_name": dataset.table_name,
}

# import charts
chart_ids: Dict[str, int] = {}
for file_name, config in configs.items():
if file_name.startswith("charts/"):
# update datasource id, type, and name
config.update(dataset_info[config["dataset_uuid"]])
chart = import_chart(session, config, overwrite=overwrite)
chart_ids[str(chart.uuid)] = chart.id

# store the existing relationship between dashboards and charts
existing_relationships = session.execute(
select([dashboard_slices.c.dashboard_id, dashboard_slices.c.slice_id])
).fetchall()

# import dashboards
dashboard_chart_ids: List[Tuple[int, int]] = []
for file_name, config in configs.items():
if file_name.startswith("dashboards/"):
config = update_id_refs(config, chart_ids)
dashboard = import_dashboard(session, config, overwrite=overwrite)
for uuid in find_chart_uuids(config["position"]):
chart_id = chart_ids[uuid]
if (dashboard.id, chart_id) not in existing_relationships:
dashboard_chart_ids.append((dashboard.id, chart_id))

# set ref in the dashboard_slices table
values = [
{"dashboard_id": dashboard_id, "slice_id": chart_id}
for (dashboard_id, chart_id) in dashboard_chart_ids
]
# pylint: disable=no-value-for-parameter (sqlalchemy/issues/4656)
session.execute(dashboard_slices.insert(), values)
2 changes: 1 addition & 1 deletion superset/commands/importers/v1/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def remove_root(file_path: str) -> str:

class MetadataSchema(Schema):
version = fields.String(required=True, validate=validate.Equal(IMPORT_VERSION))
type = fields.String(required=True)
type = fields.String(required=False)
timestamp = fields.DateTime()


Expand Down
14 changes: 2 additions & 12 deletions superset/dashboards/commands/importers/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from typing import Any, Dict, Iterator, List, Set, Tuple
from typing import Any, Dict, List, Set, Tuple

from marshmallow import Schema
from sqlalchemy.orm import Session
Expand All @@ -26,6 +26,7 @@
from superset.commands.importers.v1 import ImportModelsCommand
from superset.dashboards.commands.exceptions import DashboardImportError
from superset.dashboards.commands.importers.v1.utils import (
find_chart_uuids,
import_dashboard,
update_id_refs,
)
Expand All @@ -38,17 +39,6 @@
from superset.models.dashboard import dashboard_slices


def find_chart_uuids(position: Dict[str, Any]) -> Iterator[str]:
"""Find all chart UUIDs in a dashboard"""
for child in position.values():
if (
isinstance(child, dict)
and child["type"] == "CHART"
and "uuid" in child["meta"]
):
yield child["meta"]["uuid"]


class ImportDashboardsCommand(ImportModelsCommand):

"""Import dashboards"""
Expand Down
9 changes: 5 additions & 4 deletions superset/dashboards/commands/importers/v1/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import json
import logging
from typing import Any, Dict
from typing import Any, Dict, Set

from sqlalchemy.orm import Session

Expand All @@ -29,6 +29,10 @@
JSON_KEYS = {"position": "position_json", "metadata": "json_metadata"}


def find_chart_uuids(position: Dict[str, Any]) -> Set[str]:
return set(build_uuid_to_id_map(position))


def build_uuid_to_id_map(position: Dict[str, Any]) -> Dict[str, int]:
return {
child["meta"]["uuid"]: child["meta"]["chartId"]
Expand All @@ -43,9 +47,6 @@ def build_uuid_to_id_map(position: Dict[str, Any]) -> Dict[str, int]:

def update_id_refs(config: Dict[str, Any], chart_ids: Dict[str, int]) -> Dict[str, Any]:
"""Update dashboard metadata to use new IDs"""
if not config.get("metadata"):
return config

fixed = config.copy()

# build map old_id => new_id
Expand Down
56 changes: 56 additions & 0 deletions superset/datasets/commands/importers/v1/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,48 @@

import json
import logging
import re
from typing import Any, Dict
from urllib import request

import pandas as pd
from sqlalchemy import Date, Float, String
from sqlalchemy.orm import Session
from sqlalchemy.sql.visitors import VisitableType

from superset.connectors.sqla.models import SqlaTable

logger = logging.getLogger(__name__)

CHUNKSIZE = 512
VARCHAR = re.compile(r"VARCHAR\((\d+)\)", re.IGNORECASE)

JSON_KEYS = {"params", "template_params", "extra"}


def get_sqla_type(native_type: str) -> VisitableType:
if native_type.upper() == "DATE":
return Date()

if native_type.upper() == "FLOAT":
return Float()

match = VARCHAR.match(native_type)
if match:
size = int(match.group(1))
return String(size)

raise Exception(f"Unknown type: {native_type}")


def get_dtype(df: pd.DataFrame, dataset: SqlaTable) -> Dict[str, VisitableType]:
return {
column.column_name: get_sqla_type(column.type)
for column in dataset.columns
if column.column_name in df.keys()
}


def import_dataset(
session: Session, config: Dict[str, Any], overwrite: bool = False
) -> SqlaTable:
Expand Down Expand Up @@ -55,9 +86,34 @@ def import_dataset(
# should we delete columns and metrics not present in the current import?
sync = ["columns", "metrics"] if overwrite else []

# should we also load data into the dataset?
data_uri = config.get("data")

# import recursively to include columns and metrics
dataset = SqlaTable.import_from_dict(session, config, recursive=True, sync=sync)
if dataset.id is None:
session.flush()

# load data
if data_uri:
data = request.urlopen(data_uri)
df = pd.read_csv(data, encoding="utf-8")
dtype = get_dtype(df, dataset)

# convert temporal columns
for column_name, sqla_type in dtype.items():
if isinstance(sqla_type, Date):
df[column_name] = pd.to_datetime(df[column_name])

df.to_sql(
dataset.table_name,
con=session.connection(),
schema=dataset.schema,
if_exists="replace",
chunksize=CHUNKSIZE,
dtype=dtype,
index=False,
method="multi",
)

return dataset
1 change: 1 addition & 0 deletions superset/datasets/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,4 @@ class ImportV1DatasetSchema(Schema):
metrics = fields.List(fields.Nested(ImportV1MetricSchema))
version = fields.String(required=True)
database_uuid = fields.UUID(required=True)
data = fields.URL()
2 changes: 1 addition & 1 deletion superset/examples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@
from .random_time_series import load_random_time_series_data
from .sf_population_polygons import load_sf_population_polygons
from .tabbed_dashboard import load_tabbed_dashboard
from .unicode_test_data import load_unicode_test_data
from .utils import load_from_configs
from .world_bank import load_world_bank_health_n_pop
40 changes: 40 additions & 0 deletions superset/examples/configs/charts/Unicode_Cloud.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
slice_name: Unicode Cloud
viz_type: word_cloud
params:
granularity_sqla: dttm
groupby: []
limit: '100'
metric:
aggregate: SUM
column:
column_name: value
expressionType: SIMPLE
label: Value
rotation: square
row_limit: 50000
series: short_phrase
since: 100 years ago
size_from: '10'
size_to: '70'
until: now
viz_type: word_cloud
cache_timeout: null
uuid: 609e26d8-8e1e-4097-9751-931708e24ee4
version: 1.0.0
dataset_uuid: a6771c73-96fc-44c6-8b6e-9d303955ea48
Loading

0 comments on commit 5e811a1

Please sign in to comment.