From e85d2540691f56350ee0fc26fd33317203a80e76 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 28 Apr 2022 16:02:18 -0500 Subject: [PATCH] tweak --- homeassistant/components/recorder/purge.py | 146 +++++++++++++++++---- tests/components/recorder/test_purge.py | 16 ++- 2 files changed, 131 insertions(+), 31 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 1fde865a5aeabd..54334495a65f75 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -3,10 +3,11 @@ from collections.abc import Callable, Iterable from datetime import datetime +from itertools import zip_longest import logging from typing import TYPE_CHECKING -from sqlalchemy import bindparam, func, lambda_stmt, select, union_all +from sqlalchemy import func, lambda_stmt, select, union_all from sqlalchemy.orm.session import Session from sqlalchemy.sql.expression import distinct from sqlalchemy.sql.lambdas import StatementLambdaElement @@ -113,18 +114,114 @@ def _select_event_state_and_attributes_ids_to_purge( return event_ids, state_ids, attributes_ids -def _generate_find_attr_lambda() -> StatementLambdaElement: +def _generate_find_attr_lambda(attribute_ids: list[int]) -> StatementLambdaElement: """Generate the find attributes select only once.""" + ( + attr1, + attr2, + attr3, + attr4, + attr5, + attr6, + attr7, + attr8, + attr9, + attr10, + attr11, + attr12, + attr13, + attr14, + attr15, + attr16, + attr17, + attr18, + attr19, + attr20, + attr21, + attr22, + attr23, + attr24, + attr25, + attr26, + attr27, + attr28, + attr29, + attr30, + ) = attribute_ids return lambda_stmt( lambda: union_all( - *[ - select(func.min(States.attributes_id)).where( - States.attributes_id == bindparam(f"a{idx}", required=False) - ) - for idx in range( - 998 - ) # MAX_ROWS_TO_PURGE inlined to avoid TypeError: 'PyWrapper' object cannot be interpreted as an integer - ] + select(func.min(States.attributes_id)).where(States.attributes_id == attr1), + select(func.min(States.attributes_id)).where(States.attributes_id == attr2), + select(func.min(States.attributes_id)).where(States.attributes_id == attr3), + select(func.min(States.attributes_id)).where(States.attributes_id == attr4), + select(func.min(States.attributes_id)).where(States.attributes_id == attr5), + select(func.min(States.attributes_id)).where(States.attributes_id == attr6), + select(func.min(States.attributes_id)).where(States.attributes_id == attr7), + select(func.min(States.attributes_id)).where(States.attributes_id == attr8), + select(func.min(States.attributes_id)).where(States.attributes_id == attr9), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr10 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr11 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr12 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr13 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr14 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr15 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr16 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr17 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr18 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr19 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr20 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr21 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr22 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr23 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr24 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr25 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr26 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr27 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr28 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr29 + ), + select(func.min(States.attributes_id)).where( + States.attributes_id == attr30 + ), ) ) @@ -149,9 +246,12 @@ def _select_unused_attributes_ids( # > explain select distinct attributes_id from states where attributes_id in (136723); # ...Using index # - id_query = session.query(distinct(States.attributes_id)).filter( - States.attributes_id.in_(attributes_ids) - ) + seen_ids = { + state[0] + for state in session.query(distinct(States.attributes_id)) + .filter(States.attributes_id.in_(attributes_ids)) + .all() + } else: # # This branch is for DBMS that cannot optimize the distinct query well and has to examine @@ -176,17 +276,15 @@ def _select_unused_attributes_ids( # with NULL values so sqlalchemy does not end up with MAX_ROWS_TO_PURGE # different queries in the cache. # - id_query = session.execute( - _generate_find_attr_lambda().params( - { - f"a{idx}": attributes_id - for idx, attributes_id in enumerate(attributes_ids) - } - ) - ) - to_remove = attributes_ids - { - state[0] for state in id_query.all() if state[0] is not None - } + seen_ids = set() + groups = [iter(attributes_ids)] * 30 + for attr_ids in zip_longest(*groups, fillvalue=None): + seen_ids |= { + state[0] + for state in session.execute(_generate_find_attr_lambda(attr_ids)).all() + if state[0] is not None + } + to_remove = attributes_ids - seen_ids _LOGGER.debug( "Selected %s shared attributes to remove", len(to_remove), diff --git a/tests/components/recorder/test_purge.py b/tests/components/recorder/test_purge.py index 3895393e557bd7..ad555d8a3be00c 100644 --- a/tests/components/recorder/test_purge.py +++ b/tests/components/recorder/test_purge.py @@ -59,9 +59,9 @@ def mock_use_sqlite(request): with patch( "homeassistant.components.recorder.Recorder.using_sqlite", return_value=request.param, - ), patch( - "homeassistant.components.recorder.purge._generate_find_attr_lambda", - _generate_sqlite_compatible_find_attr_lambda, + # ), patch( + # "homeassistant.components.recorder.purge._generate_find_attr_lambda", + # _generate_sqlite_compatible_find_attr_lambda, ): yield @@ -188,10 +188,12 @@ async def test_purge_old_states_encounters_temporary_mysql_error( "homeassistant.components.recorder.purge._purge_old_recorder_runs", side_effect=[mysql_exception, None], ), patch.object( - instance.engine.dialect, "name", "mysql" - ), patch( - "homeassistant.components.recorder.purge._generate_find_attr_lambda", - _generate_sqlite_compatible_find_attr_lambda, + instance.engine.dialect, + "name", + "mysql" + # ), patch( + # "homeassistant.components.recorder.purge._generate_find_attr_lambda", + # _generate_sqlite_compatible_find_attr_lambda, ): await hass.services.async_call( recorder.DOMAIN, recorder.SERVICE_PURGE, {"keep_days": 0}