From ae6e2a00a029543159d8afb90064e0a8d33a7934 Mon Sep 17 00:00:00 2001 From: Ville Brofeldt <33317356+villebro@users.noreply.github.com> Date: Fri, 24 Feb 2023 10:45:16 +0200 Subject: [PATCH] fix(rbac): show objects accessible by database access perm (#23118) (cherry picked from commit 89576f8a87ff5dada314004c03d5ed4241595d31) --- UPDATING.md | 2 + superset/charts/filters.py | 26 ++++-------- superset/dashboards/filters.py | 14 +++---- superset/security/manager.py | 25 +++++++---- superset/utils/filters.py | 41 +++++++++++++++++++ superset/views/base.py | 21 +++------- superset/views/chart/filters.py | 9 ++-- tests/integration_tests/datasets/api_tests.py | 37 +++++++++++++---- 8 files changed, 114 insertions(+), 61 deletions(-) create mode 100644 superset/utils/filters.py diff --git a/UPDATING.md b/UPDATING.md index 40f2bf97554e1..add667bf43af9 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -43,6 +43,8 @@ assists people when migrating to a new version. ### Other +- [23118](https://github.com/apache/superset/pull/23118): Previously the "database access on " permission granted access to all datasets on the underlying database, but they didn't show up on the list views. Now all dashboards, charts and datasets that are accessible via this permission will also show up on their respective list views. + ## 2.0.1 - [21895](https://github.com/apache/superset/pull/21895): Markdown components had their security increased by adhering to the same sanitization process enforced by Github. This means that some HTML elements found in markdowns are not allowed anymore due to the security risks they impose. If you're deploying Superset in a trusted environment and wish to use some of the blocked elements, then you can use the HTML_SANITIZATION_SCHEMA_EXTENSIONS configuration to extend the default sanitization schema. There's also the option to disable HTML sanitization using the HTML_SANITIZATION configuration but we do not recommend this approach because of the security risks. Given the provided configurations, we don't view the improved sanitization as a breaking change but as a security patch. diff --git a/superset/charts/filters.py b/superset/charts/filters.py index fd3fff7f6e2fa..8b60fcb8d9be3 100644 --- a/superset/charts/filters.py +++ b/superset/charts/filters.py @@ -18,13 +18,15 @@ from flask_babel import lazy_gettext as _ from sqlalchemy import and_, or_ +from sqlalchemy.orm import aliased from sqlalchemy.orm.query import Query -from superset import db, security_manager +from superset import security_manager from superset.connectors.sqla import models from superset.connectors.sqla.models import SqlaTable from superset.models.slice import Slice from superset.utils.core import get_user_id +from superset.utils.filters import get_dataset_access_filters from superset.views.base import BaseFilter from superset.views.base_api import BaseFavoriteFilter @@ -77,23 +79,13 @@ class ChartFilter(BaseFilter): # pylint: disable=too-few-public-methods def apply(self, query: Query, value: Any) -> Query: if security_manager.can_access_all_datasources(): return query - perms = security_manager.user_view_menu_names("datasource_access") - schema_perms = security_manager.user_view_menu_names("schema_access") - owner_ids_query = ( - db.session.query(models.SqlaTable.id) - .join(models.SqlaTable.owners) - .filter( - security_manager.user_model.id - == security_manager.user_model.get_user_id() - ) - ) - return query.filter( - or_( - self.model.perm.in_(perms), - self.model.schema_perm.in_(schema_perms), - models.SqlaTable.id.in_(owner_ids_query), - ) + + table_alias = aliased(SqlaTable) + query = query.join(table_alias, self.model.datasource_id == table_alias.id) + query = query.join( + models.Database, table_alias.database_id == models.Database.id ) + return query.filter(get_dataset_access_filters(self.model)) class ChartHasCreatedByFilter(BaseFilter): # pylint: disable=too-few-public-methods diff --git a/superset/dashboards/filters.py b/superset/dashboards/filters.py index e09609ff511e0..b28dc8ea3872f 100644 --- a/superset/dashboards/filters.py +++ b/superset/dashboards/filters.py @@ -24,12 +24,14 @@ from sqlalchemy.orm.query import Query from superset import db, is_feature_enabled, security_manager -from superset.models.core import FavStar +from superset.connectors.sqla.models import SqlaTable +from superset.models.core import Database, FavStar from superset.models.dashboard import Dashboard from superset.models.embedded_dashboard import EmbeddedDashboard from superset.models.slice import Slice from superset.security.guest_token import GuestTokenResourceType, GuestUser from superset.utils.core import get_user_id +from superset.utils.filters import get_dataset_access_filters from superset.views.base import BaseFilter from superset.views.base_api import BaseFavoriteFilter @@ -101,9 +103,6 @@ def apply(self, query: Query, value: Any) -> Query: if security_manager.is_admin(): return query - datasource_perms = security_manager.user_view_menu_names("datasource_access") - schema_perms = security_manager.user_view_menu_names("schema_access") - is_rbac_disabled_filter = [] dashboard_has_roles = Dashboard.roles.any() if is_feature_enabled("DASHBOARD_RBAC"): @@ -112,13 +111,14 @@ def apply(self, query: Query, value: Any) -> Query: datasource_perm_query = ( db.session.query(Dashboard.id) .join(Dashboard.slices, isouter=True) + .join(SqlaTable, Slice.datasource_id == SqlaTable.id) + .join(Database, SqlaTable.database_id == Database.id) .filter( and_( Dashboard.published.is_(True), *is_rbac_disabled_filter, - or_( - Slice.perm.in_(datasource_perms), - Slice.schema_perm.in_(schema_perms), + get_dataset_access_filters( + Slice, security_manager.can_access_all_datasources(), ), ) diff --git a/superset/security/manager.py b/superset/security/manager.py index e09f100b1a5df..1247a1634dcff 100644 --- a/superset/security/manager.py +++ b/superset/security/manager.py @@ -84,6 +84,7 @@ get_user_id, RowLevelSecurityFilterType, ) +from superset.utils.filters import get_dataset_access_filters from superset.utils.urls import get_url_host if TYPE_CHECKING: @@ -98,6 +99,8 @@ logger = logging.getLogger(__name__) +DATABASE_PERM_REGEX = re.compile(r"^\[.+\]\.\(id\:(?P\d+)\)$") + class DatabaseAndSchema(NamedTuple): database: str @@ -525,8 +528,6 @@ def get_user_datasources(self) -> List["BaseDatasource"]: :returns: The list of datasources """ - user_perms = self.user_view_menu_names("datasource_access") - schema_perms = self.user_view_menu_names("schema_access") user_datasources = set() # pylint: disable=import-outside-toplevel @@ -534,12 +535,7 @@ def get_user_datasources(self) -> List["BaseDatasource"]: user_datasources.update( self.get_session.query(SqlaTable) - .filter( - or_( - SqlaTable.perm.in_(user_perms), - SqlaTable.schema_perm.in_(schema_perms), - ) - ) + .filter(get_dataset_access_filters(SqlaTable)) .all() ) @@ -604,6 +600,19 @@ def user_view_menu_names(self, permission_name: str) -> Set[str]: return {s.name for s in view_menu_names} return set() + def get_accessible_databases(self) -> List[int]: + """ + Return the list of databases accessible by the user. + + :return: The list of accessible Databases + """ + perms = self.user_view_menu_names("database_access") + return [ + int(match.group("id")) + for perm in perms + if (match := DATABASE_PERM_REGEX.match(perm)) + ] + def get_schemas_accessible_by_user( self, database: "Database", schemas: List[str], hierarchical: bool = True ) -> List[str]: diff --git a/superset/utils/filters.py b/superset/utils/filters.py new file mode 100644 index 0000000000000..4772f49ba0a36 --- /dev/null +++ b/superset/utils/filters.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import Any, Type + +from flask_appbuilder import Model +from sqlalchemy import or_ +from sqlalchemy.sql.elements import BooleanClauseList + + +def get_dataset_access_filters( + base_model: Type[Model], + *args: Any, +) -> BooleanClauseList: + # pylint: disable=import-outside-toplevel + from superset import security_manager + from superset.connectors.sqla.models import Database + + database_ids = security_manager.get_accessible_databases() + perms = security_manager.user_view_menu_names("datasource_access") + schema_perms = security_manager.user_view_menu_names("schema_access") + + return or_( + Database.id.in_(database_ids), + base_model.perm.in_(perms), + base_model.schema_perm.in_(schema_perms), + *args, + ) diff --git a/superset/views/base.py b/superset/views/base.py index f6651e5c74528..6e28745821ee2 100644 --- a/superset/views/base.py +++ b/superset/views/base.py @@ -46,7 +46,7 @@ from flask_wtf.csrf import CSRFError from flask_wtf.form import FlaskForm from pkg_resources import resource_filename -from sqlalchemy import exc, or_ +from sqlalchemy import exc from sqlalchemy.orm import Query from werkzeug.exceptions import HTTPException from wtforms import Form @@ -78,7 +78,7 @@ from superset.superset_typing import FlaskResponse from superset.translations.utils import get_language_pack from superset.utils import core as utils -from superset.utils.core import get_user_id +from superset.utils.filters import get_dataset_access_filters from .utils import bootstrap_user_data @@ -670,20 +670,11 @@ class DatasourceFilter(BaseFilter): # pylint: disable=too-few-public-methods def apply(self, query: Query, value: Any) -> Query: if security_manager.can_access_all_datasources(): return query - datasource_perms = security_manager.user_view_menu_names("datasource_access") - schema_perms = security_manager.user_view_menu_names("schema_access") - owner_ids_query = ( - db.session.query(models.SqlaTable.id) - .join(models.SqlaTable.owners) - .filter(security_manager.user_model.id == get_user_id()) - ) - return query.filter( - or_( - self.model.perm.in_(datasource_perms), - self.model.schema_perm.in_(schema_perms), - models.SqlaTable.id.in_(owner_ids_query), - ) + query = query.join( + models.Database, + models.Database.id == self.model.database_id, ) + return query.filter(get_dataset_access_filters(self.model)) class CsvResponse(Response): diff --git a/superset/views/chart/filters.py b/superset/views/chart/filters.py index d7319305366b0..3a467156dbc20 100644 --- a/superset/views/chart/filters.py +++ b/superset/views/chart/filters.py @@ -16,10 +16,10 @@ # under the License. from typing import Any -from sqlalchemy import or_ from sqlalchemy.orm.query import Query from superset import security_manager +from superset.utils.filters import get_dataset_access_filters from superset.views.base import BaseFilter @@ -27,8 +27,5 @@ class SliceFilter(BaseFilter): # pylint: disable=too-few-public-methods def apply(self, query: Query, value: Any) -> Query: if security_manager.can_access_all_datasources(): return query - perms = security_manager.user_view_menu_names("datasource_access") - schema_perms = security_manager.user_view_menu_names("schema_access") - return query.filter( - or_(self.model.perm.in_(perms), self.model.schema_perm.in_(schema_perms)) - ) + + return query.filter(get_dataset_access_filters(self.model)) diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py index 6e0551bd9f826..8071902c455da 100644 --- a/tests/integration_tests/datasets/api_tests.py +++ b/tests/integration_tests/datasets/api_tests.py @@ -239,28 +239,47 @@ def test_get_dataset_list_gamma(self): response = json.loads(rv.data.decode("utf-8")) assert response["result"] == [] - def test_get_dataset_list_gamma_owned(self): + def test_get_dataset_list_gamma_has_database_access(self): """ - Dataset API: Test get dataset list owned by gamma + Dataset API: Test get dataset list with database access """ if backend() == "sqlite": return + self.login(username="gamma") + + # create new dataset main_db = get_main_database() - owned_dataset = self.insert_dataset( - "ab_user", [self.get_user("gamma").id], main_db + dataset = self.insert_dataset("ab_user", [], main_db) + + # make sure dataset is not visible due to missing perms + uri = "api/v1/dataset/" + rv = self.get_assert_metric(uri, "get_list") + assert rv.status_code == 200 + response = json.loads(rv.data.decode("utf-8")) + + assert response["count"] == 0 + + # give database access to main db + main_db_pvm = security_manager.find_permission_view_menu( + "database_access", main_db.perm ) + gamma_role = security_manager.find_role("Gamma") + gamma_role.permissions.append(main_db_pvm) + db.session.commit() - self.login(username="gamma") + # make sure dataset is now visible uri = "api/v1/dataset/" rv = self.get_assert_metric(uri, "get_list") assert rv.status_code == 200 response = json.loads(rv.data.decode("utf-8")) - assert response["count"] == 1 - assert response["result"][0]["table_name"] == "ab_user" + tables = {tbl["table_name"] for tbl in response["result"]} + assert tables == {"ab_user"} - db.session.delete(owned_dataset) + # revert gamma permission + gamma_role.permissions.remove(main_db_pvm) + db.session.delete(dataset) db.session.commit() def test_get_dataset_related_database_gamma(self): @@ -2255,6 +2274,8 @@ def test_duplicate_virtual_dataset(self): assert len(new_dataset.columns) == 2 assert new_dataset.columns[0].column_name == "id" assert new_dataset.columns[1].column_name == "name" + db.session.delete(new_dataset) + db.session.commit() @pytest.mark.usefixtures("create_datasets") def test_duplicate_physical_dataset(self):