diff --git a/UPDATING.md b/UPDATING.md index 909cd9f6d9bed..ad6427dfb43d9 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -51,6 +51,7 @@ assists people when migrating to a new version. ### Potential Downtime - [27392](https://github.com/apache/superset/pull/27392): Adds an index to `query.sql_editor_id` to improve performance. This may cause downtime on large deployments. +- [28422](https://github.com/apache/superset/pull/28422): Potentially augments the `query.executed_sql` and `query.select_sql` columns for MySQL from `MEDIUMTEXT` to `LONGTEXT`. Potential downtime may be required for large deployments which previously ran [27119](https://github.com/apache/superset/pull/27119). ## 4.0.0 diff --git a/superset/migrations/shared/utils.py b/superset/migrations/shared/utils.py index db20140db940b..208d839bc183d 100644 --- a/superset/migrations/shared/utils.py +++ b/superset/migrations/shared/utils.py @@ -34,21 +34,40 @@ DEFAULT_BATCH_SIZE = int(os.environ.get("BATCH_SIZE", 1000)) -def table_has_column(table: str, column: str) -> bool: +def get_table_column( + table_name: str, + column_name: str, +) -> Optional[list[dict[str, Any]]]: """ - Checks if a column exists in a given table. + Get the specified column. - :param table: A table name - :param column: A column name - :returns: True iff the column exists in the table + :param table_name: The Table name + :param column_name: The column name + :returns: The column """ insp = inspect(op.get_context().bind) try: - return any(col["name"] == column for col in insp.get_columns(table)) + for column in insp.get_columns(table_name): + if column["name"] == column_name: + return column except NoSuchTableError: - return False + pass + + return None + + +def table_has_column(table_name: str, column_name: str) -> bool: + """ + Checks if a column exists in a given table. + + :param table_name: A table name + :param column_name: A column name + :returns: True iff the column exists in the table + """ + + return bool(get_table_column(table_name, column_name)) def table_has_index(table: str, index: str) -> bool: diff --git a/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py b/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py index 3ba126d24ec29..da15245a071f2 100644 --- a/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py +++ b/superset/migrations/versions/2024-02-14_14-43_17fcea065655_change_text_to_mediumtext.py @@ -28,8 +28,10 @@ import sqlalchemy as sa # noqa: E402 from alembic import op # noqa: E402 +from sqlalchemy.dialects.mysql import MEDIUMTEXT, TEXT # noqa: E402 from sqlalchemy.dialects.mysql.base import MySQLDialect # noqa: E402 +from superset.migrations.shared.utils import get_table_column # noqa: E402 from superset.utils.core import MediumText # noqa: E402 TABLE_COLUMNS = [ @@ -38,8 +40,6 @@ "dashboards.css", "keyvalue.value", "query.extra_json", - "query.executed_sql", - "query.select_sql", "report_execution_log.value_row_json", "report_recipient.recipient_config_json", "report_schedule.sql", @@ -65,23 +65,35 @@ def upgrade(): if isinstance(op.get_bind().dialect, MySQLDialect): - for column in TABLE_COLUMNS: - with op.batch_alter_table(column.split(".")[0]) as batch_op: - batch_op.alter_column( - column.split(".")[1], - existing_type=sa.Text(), - type_=MediumText(), - existing_nullable=column not in NOT_NULL_COLUMNS, - ) + for item in TABLE_COLUMNS: + table_name, column_name = item.split(".") + + if (column := get_table_column(table_name, column_name)) and isinstance( + column["type"], + TEXT, + ): + with op.batch_alter_table(table_name) as batch_op: + batch_op.alter_column( + column_name, + existing_type=sa.Text(), + type_=MediumText(), + existing_nullable=item not in NOT_NULL_COLUMNS, + ) def downgrade(): if isinstance(op.get_bind().dialect, MySQLDialect): - for column in TABLE_COLUMNS: - with op.batch_alter_table(column.split(".")[0]) as batch_op: - batch_op.alter_column( - column.split(".")[1], - existing_type=MediumText(), - type_=sa.Text(), - existing_nullable=column not in NOT_NULL_COLUMNS, - ) + for item in TABLE_COLUMNS: + table_name, column_name = item.split(".") + + if (column := get_table_column(table_name, column_name)) and isinstance( + column["type"], + MEDIUMTEXT, + ): + with op.batch_alter_table(table_name) as batch_op: + batch_op.alter_column( + column_name, + existing_type=MediumText(), + type_=sa.Text(), + existing_nullable=item not in NOT_NULL_COLUMNS, + ) diff --git a/superset/migrations/versions/2024-05-09_19-19_f7b6750b67e8_.py b/superset/migrations/versions/2024-05-09_19-19_f7b6750b67e8_.py new file mode 100644 index 0000000000000..642723fb0fefd --- /dev/null +++ b/superset/migrations/versions/2024-05-09_19-19_f7b6750b67e8_.py @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""change_mediumtext_to_longtext + +Revision ID: f7b6750b67e8 +Revises: 4081be5b6b74 +Create Date: 2024-05-09 19:19:46.630140 + +""" + +# revision identifiers, used by Alembic. +revision = "f7b6750b67e8" +down_revision = "4081be5b6b74" + +from alembic import op # noqa: E402 +from sqlalchemy.dialects.mysql import MEDIUMTEXT # noqa: E402 +from sqlalchemy.dialects.mysql.base import MySQLDialect # noqa: E402 + +from superset.migrations.shared.utils import get_table_column # noqa: E402 +from superset.utils.core import LongText, MediumText # noqa: E402 + + +def upgrade(): + if isinstance(op.get_bind().dialect, MySQLDialect): + for item in ["query.executed_sql", "query.select_sql"]: + table_name, column_name = item.split(".") + + if (column := get_table_column(table_name, column_name)) and isinstance( + column["type"], + MEDIUMTEXT, + ): + with op.batch_alter_table(table_name) as batch_op: + batch_op.alter_column( + column_name, + existing_type=MediumText(), + type_=LongText(), + existing_nullable=True, + ) + + +def downgrade(): + pass diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py index 41647ea43b9bf..4e948b58e90e1 100644 --- a/superset/models/sql_lab.py +++ b/superset/models/sql_lab.py @@ -59,7 +59,13 @@ ) from superset.sql_parse import CtasMethod, extract_tables_from_jinja_sql, Table from superset.sqllab.limiting_factor import LimitingFactor -from superset.utils.core import get_column_name, MediumText, QueryStatus, user_label +from superset.utils.core import ( + get_column_name, + LongText, + MediumText, + QueryStatus, + user_label, +) if TYPE_CHECKING: from superset.connectors.sqla.models import TableColumn @@ -110,11 +116,11 @@ class Query( sql_editor_id = Column(String(256), index=True) schema = Column(String(256)) catalog = Column(String(256), nullable=True, default=None) - sql = Column(MediumText()) + sql = Column(LongText()) # Query to retrieve the results, # used only in case of select_as_cta_used is true. - select_sql = Column(MediumText()) - executed_sql = Column(MediumText()) + select_sql = Column(LongText()) + executed_sql = Column(LongText()) # Could be configured in the superset config. limit = Column(Integer) limiting_factor = Column( diff --git a/superset/utils/core.py b/superset/utils/core.py index 6b44fda4e4352..e0eef6791db43 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -70,7 +70,7 @@ from pandas.api.types import infer_dtype from pandas.core.dtypes.common import is_numeric_dtype from sqlalchemy import event, exc, inspect, select, Text -from sqlalchemy.dialects.mysql import MEDIUMTEXT +from sqlalchemy.dialects.mysql import LONGTEXT, MEDIUMTEXT from sqlalchemy.engine import Connection, Engine from sqlalchemy.engine.reflection import Inspector from sqlalchemy.sql.type_api import Variant @@ -1497,6 +1497,10 @@ def MediumText() -> Variant: # pylint:disable=invalid-name return Text().with_variant(MEDIUMTEXT(), "mysql") +def LongText() -> Variant: # pylint:disable=invalid-name + return Text().with_variant(LONGTEXT(), "mysql") + + def shortid() -> str: return f"{uuid.uuid4()}"[-12:]