Skip to content

Improve history api performance #35822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 86 additions & 23 deletions homeassistant/components/history/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from homeassistant.components import recorder
from homeassistant.components.http import HomeAssistantView
from homeassistant.components.recorder.models import States
from homeassistant.components.recorder.models import DB_TIMEZONE, States
from homeassistant.components.recorder.util import execute, session_scope
from homeassistant.const import (
ATTR_HIDDEN,
Expand All @@ -22,6 +22,7 @@
CONF_INCLUDE,
HTTP_BAD_REQUEST,
)
from homeassistant.core import split_entity_id
import homeassistant.helpers.config_validation as cv
import homeassistant.util.dt as dt_util

Expand All @@ -32,6 +33,9 @@
DOMAIN = "history"
CONF_ORDER = "use_include_order"

STATE_KEY = "state"
LAST_CHANGED_KEY = "last_changed"

CONFIG_SCHEMA = vol.Schema(
{
DOMAIN: recorder.FILTER_SCHEMA.extend(
Expand All @@ -43,6 +47,9 @@

SIGNIFICANT_DOMAINS = ("climate", "device_tracker", "thermostat", "water_heater")
IGNORE_DOMAINS = ("zone", "scene")
NEED_ATTRIBUTE_DOMAINS = {"climate", "water_heater", "thermostat", "script"}
SCRIPT_DOMAIN = "script"
ATTR_CAN_CANCEL = "can_cancel"


def get_significant_states(hass, *args, **kwargs):
Expand All @@ -60,6 +67,7 @@ def _get_significant_states(
filters=None,
include_start_time_state=True,
significant_changes_only=True,
minimal_response=False,
):
"""
Return states changes during UTC period start_time - end_time.
Expand Down Expand Up @@ -87,26 +95,23 @@ def _get_significant_states(
if end_time is not None:
query = query.filter(States.last_updated < end_time)

query = query.order_by(States.last_updated)
query = query.order_by(States.entity_id, States.last_updated)

states = (
state
for state in execute(query)
if (_is_significant(state) and not state.attributes.get(ATTR_HIDDEN, False))
)
states = execute(query, to_native=False)

if _LOGGER.isEnabledFor(logging.DEBUG):
elapsed = time.perf_counter() - timer_start
_LOGGER.debug("get_significant_states took %fs", elapsed)

return _states_to_json(
return _sorted_states_to_json(
hass,
session,
states,
start_time,
entity_ids,
filters,
include_start_time_state,
minimal_response,
)


Expand All @@ -127,9 +132,11 @@ def state_changes_during_period(hass, start_time, end_time=None, entity_id=None)

entity_ids = [entity_id] if entity_id is not None else None

states = execute(query.order_by(States.last_updated))
states = execute(
query.order_by(States.entity_id, States.last_updated), to_native=False
)

return _states_to_json(hass, session, states, start_time, entity_ids)
return _sorted_states_to_json(hass, session, states, start_time, entity_ids)


def get_last_state_changes(hass, number_of_states, entity_id):
Expand All @@ -146,10 +153,13 @@ def get_last_state_changes(hass, number_of_states, entity_id):
entity_ids = [entity_id] if entity_id is not None else None

states = execute(
query.order_by(States.last_updated.desc()).limit(number_of_states)
query.order_by(States.entity_id, States.last_updated.desc()).limit(
number_of_states
),
to_native=False,
)

return _states_to_json(
return _sorted_states_to_json(
hass,
session,
reversed(states),
Expand Down Expand Up @@ -252,20 +262,23 @@ def _get_states_with_session(
]


def _states_to_json(
def _sorted_states_to_json(
hass,
session,
states,
start_time,
entity_ids,
filters=None,
include_start_time_state=True,
minimal_response=False,
):
"""Convert SQL results into JSON friendly data structure.

This takes our state list and turns it into a JSON friendly data
structure {'entity_id': [list of states], 'entity_id2': [list of states]}

States must be sorted by entity_id and last_updated

We also need to go back and create a synthetic zero data point for
each list of states, otherwise our graphs won't start on the Y
axis correctly.
Expand Down Expand Up @@ -293,7 +306,61 @@ def _states_to_json(

# Append all changes to it
for ent_id, group in groupby(states, lambda state: state.entity_id):
result[ent_id].extend(group)
domain = split_entity_id(ent_id)[0]
ent_results = result[ent_id]
if not minimal_response or domain in NEED_ATTRIBUTE_DOMAINS:
ent_results.extend(
[
native_state
for native_state in (db_state.to_native() for db_state in group)
if (
domain != SCRIPT_DOMAIN
or native_state.attributes.get(ATTR_CAN_CANCEL)
)
and not native_state.attributes.get(ATTR_HIDDEN, False)
]
)
continue

# With minimal response we only provide a native
# State for the first and last response. All the states
# in-between only provide the "state" and the
# "last_changed".
if not ent_results:
ent_results.append(next(group).to_native())

initial_state = ent_results[-1]
prev_state = ent_results[-1]
initial_state_count = len(ent_results)

for db_state in group:
if ATTR_HIDDEN in db_state.attributes and db_state.to_native().attributes.get(
ATTR_HIDDEN, False
):
continue

# With minimal response we do not care about attribute
# changes so we can filter out duplicate states
if db_state.state == prev_state.state:
continue

ent_results.append(
{
STATE_KEY: db_state.state,
LAST_CHANGED_KEY: f"{str(db_state.last_changed).replace(' ','T').split('.')[0]}{DB_TIMEZONE}",
}
)
prev_state = db_state

if (
prev_state
and prev_state != initial_state
and len(ent_results) != initial_state_count
):
# There was at least one state change
# replace the last minimal state with
# a full state
ent_results[-1] = prev_state.to_native()

# Filter out the empty lists if some states had 0 results.
return {key: val for key, val in result.items() if val}
Expand Down Expand Up @@ -378,6 +445,8 @@ async def get(
request.query.get("significant_changes_only", "1") != "0"
)

minimal_response = "minimal_response" in request.query

hass = request.app["hass"]

return cast(
Expand All @@ -390,6 +459,7 @@ async def get(
entity_ids,
include_start_time_state,
significant_changes_only,
minimal_response,
),
)

Expand All @@ -401,6 +471,7 @@ def _sorted_significant_states_json(
entity_ids,
include_start_time_state,
significant_changes_only,
minimal_response,
):
"""Fetch significant stats from the database as json."""
timer_start = time.perf_counter()
Expand All @@ -415,6 +486,7 @@ def _sorted_significant_states_json(
self.filters,
include_start_time_state,
significant_changes_only,
minimal_response,
)

result = list(result.values())
Expand Down Expand Up @@ -500,12 +572,3 @@ def apply(self, query, entity_ids=None):
if self.excluded_entities:
query = query.filter(~States.entity_id.in_(self.excluded_entities))
return query


def _is_significant(state):
"""Test if state is significant for history charts.

Will only test for things that are not filtered out in SQL.
"""
# scripts that are not cancellable will never change state
return state.domain != "script" or state.attributes.get("can_cancel")
10 changes: 6 additions & 4 deletions homeassistant/components/recorder/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

_LOGGER = logging.getLogger(__name__)

DB_TIMEZONE = "Z"


class Events(Base): # type: ignore
"""Event history data."""
Expand Down Expand Up @@ -64,7 +66,7 @@ def to_native(self):
self.event_type,
json.loads(self.event_data),
EventOrigin(self.origin),
_process_timestamp(self.time_fired),
process_timestamp(self.time_fired),
context=context,
)
except ValueError:
Expand Down Expand Up @@ -133,8 +135,8 @@ def to_native(self):
self.entity_id,
self.state,
json.loads(self.attributes),
_process_timestamp(self.last_changed),
_process_timestamp(self.last_updated),
process_timestamp(self.last_changed),
process_timestamp(self.last_updated),
context=context,
# Temp, because database can still store invalid entity IDs
# Remove with 1.0 or in 2020.
Expand Down Expand Up @@ -193,7 +195,7 @@ class SchemaChanges(Base): # type: ignore
changed = Column(DateTime(timezone=True), default=dt_util.utcnow)


def _process_timestamp(ts):
def process_timestamp(ts):
"""Process a timestamp into datetime object."""
if ts is None:
return None
Expand Down
26 changes: 17 additions & 9 deletions homeassistant/components/recorder/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,33 @@ def commit(session, work):
return False


def execute(qry):
def execute(qry, to_native=True):
"""Query the database and convert the objects to HA native form.

This method also retries a few times in the case of stale connections.
"""
for tryno in range(0, RETRIES):
try:
timer_start = time.perf_counter()
result = [
row for row in (row.to_native() for row in qry) if row is not None
]
if to_native:
result = [
row for row in (row.to_native() for row in qry) if row is not None
]
else:
result = list(qry)

if _LOGGER.isEnabledFor(logging.DEBUG):
elapsed = time.perf_counter() - timer_start
_LOGGER.debug(
"converting %d rows to native objects took %fs",
len(result),
elapsed,
)
if to_native:
_LOGGER.debug(
"converting %d rows to native objects took %fs",
len(result),
elapsed,
)
else:
_LOGGER.debug(
"querying %d rows took %fs", len(result), elapsed,
)

return result
except SQLAlchemyError as err:
Expand Down
Loading