Skip to content

Commit

Permalink
Improve history api performance (#35822)
Browse files Browse the repository at this point in the history
* Improve history api performance

A new option "minimal_response" reduces the amount of data
sent between the first and last history states to only the
"last_changed" and "state" fields.

Calling to_native is now avoided where possible and only
done at the end for rows that will be returned in the response.

When sending the `minimal_response` option, the history
api now returns a json response similar to the following
for an entity

Testing:

History API Response time for 1 day
Average of 10 runs with minimal_response

Before: 19.89s. (content length : 3427428)
After: 8.44s (content length: 592199)

```
[{
	"attributes": {--TRUNCATED--},
	"context": {--TRUNCATED--},
	"entity_id": "binary_sensor.powerwall_status",
	"last_changed": "2020-05-18T23:20:03.213000+00:00",
	"last_updated": "2020-05-18T23:20:03.213000+00:00",
	"state": "on"
},
...
{
	"last_changed": "2020-05-19T00:41:08Z",
	"state": "unavailable"
},
...
{
	"attributes": {--TRUNCATED--},
	"context": {--TRUNCATED--},
	"entity_id": "binary_sensor.powerwall_status",
	"last_changed": "2020-05-19T00:42:08.069698+00:00",
	"last_updated": "2020-05-19T00:42:08.069698+00:00",
	"state": "on"
}]
```

* Remove impossible state check

* Remove another impossible state check

* Update homeassistant/components/history/__init__.py

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>

* Reorder to save some indent per review

* Make query response make sense with to_native=False

* Update test for 00:00 to Z change

* Update homeassistant/components/recorder/models.py

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
  • Loading branch information
bdraco and balloob authored May 27, 2020
1 parent 6a06d64 commit 0a6deeb
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 37 deletions.
109 changes: 86 additions & 23 deletions homeassistant/components/history/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from homeassistant.components import recorder
from homeassistant.components.http import HomeAssistantView
from homeassistant.components.recorder.models import States
from homeassistant.components.recorder.models import DB_TIMEZONE, States
from homeassistant.components.recorder.util import execute, session_scope
from homeassistant.const import (
ATTR_HIDDEN,
Expand All @@ -22,6 +22,7 @@
CONF_INCLUDE,
HTTP_BAD_REQUEST,
)
from homeassistant.core import split_entity_id
import homeassistant.helpers.config_validation as cv
import homeassistant.util.dt as dt_util

Expand All @@ -32,6 +33,9 @@
DOMAIN = "history"
CONF_ORDER = "use_include_order"

STATE_KEY = "state"
LAST_CHANGED_KEY = "last_changed"

CONFIG_SCHEMA = vol.Schema(
{
DOMAIN: recorder.FILTER_SCHEMA.extend(
Expand All @@ -43,6 +47,9 @@

SIGNIFICANT_DOMAINS = ("climate", "device_tracker", "thermostat", "water_heater")
IGNORE_DOMAINS = ("zone", "scene")
NEED_ATTRIBUTE_DOMAINS = {"climate", "water_heater", "thermostat", "script"}
SCRIPT_DOMAIN = "script"
ATTR_CAN_CANCEL = "can_cancel"


def get_significant_states(hass, *args, **kwargs):
Expand All @@ -60,6 +67,7 @@ def _get_significant_states(
filters=None,
include_start_time_state=True,
significant_changes_only=True,
minimal_response=False,
):
"""
Return states changes during UTC period start_time - end_time.
Expand Down Expand Up @@ -87,26 +95,23 @@ def _get_significant_states(
if end_time is not None:
query = query.filter(States.last_updated < end_time)

query = query.order_by(States.last_updated)
query = query.order_by(States.entity_id, States.last_updated)

states = (
state
for state in execute(query)
if (_is_significant(state) and not state.attributes.get(ATTR_HIDDEN, False))
)
states = execute(query, to_native=False)

if _LOGGER.isEnabledFor(logging.DEBUG):
elapsed = time.perf_counter() - timer_start
_LOGGER.debug("get_significant_states took %fs", elapsed)

return _states_to_json(
return _sorted_states_to_json(
hass,
session,
states,
start_time,
entity_ids,
filters,
include_start_time_state,
minimal_response,
)


Expand All @@ -127,9 +132,11 @@ def state_changes_during_period(hass, start_time, end_time=None, entity_id=None)

entity_ids = [entity_id] if entity_id is not None else None

states = execute(query.order_by(States.last_updated))
states = execute(
query.order_by(States.entity_id, States.last_updated), to_native=False
)

return _states_to_json(hass, session, states, start_time, entity_ids)
return _sorted_states_to_json(hass, session, states, start_time, entity_ids)


def get_last_state_changes(hass, number_of_states, entity_id):
Expand All @@ -146,10 +153,13 @@ def get_last_state_changes(hass, number_of_states, entity_id):
entity_ids = [entity_id] if entity_id is not None else None

states = execute(
query.order_by(States.last_updated.desc()).limit(number_of_states)
query.order_by(States.entity_id, States.last_updated.desc()).limit(
number_of_states
),
to_native=False,
)

return _states_to_json(
return _sorted_states_to_json(
hass,
session,
reversed(states),
Expand Down Expand Up @@ -252,20 +262,23 @@ def _get_states_with_session(
]


def _states_to_json(
def _sorted_states_to_json(
hass,
session,
states,
start_time,
entity_ids,
filters=None,
include_start_time_state=True,
minimal_response=False,
):
"""Convert SQL results into JSON friendly data structure.
This takes our state list and turns it into a JSON friendly data
structure {'entity_id': [list of states], 'entity_id2': [list of states]}
States must be sorted by entity_id and last_updated
We also need to go back and create a synthetic zero data point for
each list of states, otherwise our graphs won't start on the Y
axis correctly.
Expand Down Expand Up @@ -293,7 +306,61 @@ def _states_to_json(

# Append all changes to it
for ent_id, group in groupby(states, lambda state: state.entity_id):
result[ent_id].extend(group)
domain = split_entity_id(ent_id)[0]
ent_results = result[ent_id]
if not minimal_response or domain in NEED_ATTRIBUTE_DOMAINS:
ent_results.extend(
[
native_state
for native_state in (db_state.to_native() for db_state in group)
if (
domain != SCRIPT_DOMAIN
or native_state.attributes.get(ATTR_CAN_CANCEL)
)
and not native_state.attributes.get(ATTR_HIDDEN, False)
]
)
continue

# With minimal response we only provide a native
# State for the first and last response. All the states
# in-between only provide the "state" and the
# "last_changed".
if not ent_results:
ent_results.append(next(group).to_native())

initial_state = ent_results[-1]
prev_state = ent_results[-1]
initial_state_count = len(ent_results)

for db_state in group:
if ATTR_HIDDEN in db_state.attributes and db_state.to_native().attributes.get(
ATTR_HIDDEN, False
):
continue

# With minimal response we do not care about attribute
# changes so we can filter out duplicate states
if db_state.state == prev_state.state:
continue

ent_results.append(
{
STATE_KEY: db_state.state,
LAST_CHANGED_KEY: f"{str(db_state.last_changed).replace(' ','T').split('.')[0]}{DB_TIMEZONE}",
}
)
prev_state = db_state

if (
prev_state
and prev_state != initial_state
and len(ent_results) != initial_state_count
):
# There was at least one state change
# replace the last minimal state with
# a full state
ent_results[-1] = prev_state.to_native()

# Filter out the empty lists if some states had 0 results.
return {key: val for key, val in result.items() if val}
Expand Down Expand Up @@ -378,6 +445,8 @@ async def get(
request.query.get("significant_changes_only", "1") != "0"
)

minimal_response = "minimal_response" in request.query

hass = request.app["hass"]

return cast(
Expand All @@ -390,6 +459,7 @@ async def get(
entity_ids,
include_start_time_state,
significant_changes_only,
minimal_response,
),
)

Expand All @@ -401,6 +471,7 @@ def _sorted_significant_states_json(
entity_ids,
include_start_time_state,
significant_changes_only,
minimal_response,
):
"""Fetch significant stats from the database as json."""
timer_start = time.perf_counter()
Expand All @@ -415,6 +486,7 @@ def _sorted_significant_states_json(
self.filters,
include_start_time_state,
significant_changes_only,
minimal_response,
)

result = list(result.values())
Expand Down Expand Up @@ -500,12 +572,3 @@ def apply(self, query, entity_ids=None):
if self.excluded_entities:
query = query.filter(~States.entity_id.in_(self.excluded_entities))
return query


def _is_significant(state):
"""Test if state is significant for history charts.
Will only test for things that are not filtered out in SQL.
"""
# scripts that are not cancellable will never change state
return state.domain != "script" or state.attributes.get("can_cancel")
10 changes: 6 additions & 4 deletions homeassistant/components/recorder/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

_LOGGER = logging.getLogger(__name__)

DB_TIMEZONE = "Z"


class Events(Base): # type: ignore
"""Event history data."""
Expand Down Expand Up @@ -64,7 +66,7 @@ def to_native(self):
self.event_type,
json.loads(self.event_data),
EventOrigin(self.origin),
_process_timestamp(self.time_fired),
process_timestamp(self.time_fired),
context=context,
)
except ValueError:
Expand Down Expand Up @@ -133,8 +135,8 @@ def to_native(self):
self.entity_id,
self.state,
json.loads(self.attributes),
_process_timestamp(self.last_changed),
_process_timestamp(self.last_updated),
process_timestamp(self.last_changed),
process_timestamp(self.last_updated),
context=context,
# Temp, because database can still store invalid entity IDs
# Remove with 1.0 or in 2020.
Expand Down Expand Up @@ -193,7 +195,7 @@ class SchemaChanges(Base): # type: ignore
changed = Column(DateTime(timezone=True), default=dt_util.utcnow)


def _process_timestamp(ts):
def process_timestamp(ts):
"""Process a timestamp into datetime object."""
if ts is None:
return None
Expand Down
26 changes: 17 additions & 9 deletions homeassistant/components/recorder/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,33 @@ def commit(session, work):
return False


def execute(qry):
def execute(qry, to_native=True):
"""Query the database and convert the objects to HA native form.
This method also retries a few times in the case of stale connections.
"""
for tryno in range(0, RETRIES):
try:
timer_start = time.perf_counter()
result = [
row for row in (row.to_native() for row in qry) if row is not None
]
if to_native:
result = [
row for row in (row.to_native() for row in qry) if row is not None
]
else:
result = list(qry)

if _LOGGER.isEnabledFor(logging.DEBUG):
elapsed = time.perf_counter() - timer_start
_LOGGER.debug(
"converting %d rows to native objects took %fs",
len(result),
elapsed,
)
if to_native:
_LOGGER.debug(
"converting %d rows to native objects took %fs",
len(result),
elapsed,
)
else:
_LOGGER.debug(
"querying %d rows took %fs", len(result), elapsed,
)

return result
except SQLAlchemyError as err:
Expand Down
Loading

0 comments on commit 0a6deeb

Please sign in to comment.