Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(retention): Enable person on events querying for retention #9858

Merged
merged 61 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
9dec8ec
add constance setting
EDsCODE May 4, 2022
e0a5519
add minimum viable code
EDsCODE May 4, 2022
19250ba
update journeys_for
EDsCODE May 4, 2022
1b8616a
working test case
EDsCODE May 4, 2022
e50541b
chore: remove eventserializer in unnecessary places
EDsCODE May 5, 2022
b6501f0
change execution
EDsCODE May 5, 2022
de8179f
change imports
EDsCODE May 5, 2022
87a62ad
fix typing and tests
EDsCODE May 10, 2022
dfcb658
change format
EDsCODE May 10, 2022
59e7cbc
adjust
EDsCODE May 10, 2022
9f9ac5d
reset
EDsCODE May 10, 2022
347dcbb
Merge branch 'refactor-eventserializer' into person-on-events-trends
EDsCODE May 10, 2022
602c967
Merge branch 'master' into refactor-eventserializer
EDsCODE May 10, 2022
7d8d518
Merge branch 'refactor-eventserializer' into person-on-events-trends
EDsCODE May 10, 2022
9f2fb9a
split group path in trend breakdown
EDsCODE May 10, 2022
1126c78
fix: typing
EDsCODE May 10, 2022
83b2ac6
fix test
EDsCODE May 11, 2022
992e887
fix test, rename
neilkakkar May 16, 2022
e93f3e5
automatically add persons to events in tests on flush
neilkakkar May 16, 2022
0c7fb10
test CI with person-on-events
neilkakkar May 17, 2022
33a6235
wip
neilkakkar May 17, 2022
1d70928
fix
neilkakkar May 17, 2022
a7aeef4
fix ci
neilkakkar May 17, 2022
6b4a263
merge master
neilkakkar May 17, 2022
a209368
add comma
neilkakkar May 17, 2022
79a06a3
reduce test explosion
neilkakkar May 17, 2022
f17b33b
fix CH server image version #9743
neilkakkar May 17, 2022
6155027
update conditional
neilkakkar May 17, 2022
a38c21c
split person on events
neilkakkar May 17, 2022
1bfbf41
split person on events
neilkakkar May 17, 2022
fe6b5cd
split properly
neilkakkar May 17, 2022
c0b9375
fix test deps
neilkakkar May 17, 2022
110b832
add persons on events for tests
neilkakkar May 17, 2022
a6da68f
rm eventserializer changes
neilkakkar May 17, 2022
89accdf
upd
neilkakkar May 17, 2022
37a1a9c
Merge branch 'master' of github.com:PostHog/posthog into person-event…
neilkakkar May 17, 2022
72db871
fix tests
neilkakkar May 17, 2022
4a76432
streamline person on event tests
neilkakkar May 17, 2022
d4af83f
remove cache dependence, even after dematerialization deletion
neilkakkar May 17, 2022
e44e4d3
disregard snapshot tests
neilkakkar May 18, 2022
aa6a840
no casting for matrix variables
neilkakkar May 18, 2022
5513b73
resolve conflicts
neilkakkar May 18, 2022
f86fde9
fix actions problems
neilkakkar May 18, 2022
8358c0f
try to limit blast radius
neilkakkar May 18, 2022
0d67875
Update snapshots
github-actions[bot] May 18, 2022
342affa
why are these tests passing?
neilkakkar May 18, 2022
784ce42
Merge branch 'person-on-events-trends' of github.com:PostHog/posthog …
neilkakkar May 18, 2022
ff27e6d
stoopid
neilkakkar May 18, 2022
7c7223d
fix trend tests
neilkakkar May 18, 2022
4373cec
fix more trend tests, add test for old query regression
neilkakkar May 19, 2022
0cff27c
resolve conflicts
neilkakkar May 19, 2022
64f2848
Update snapshots
github-actions[bot] May 19, 2022
eb8611a
optimise persons
neilkakkar May 19, 2022
3a6a403
feat(retention): Enable person on events querying for retention
neilkakkar May 19, 2022
927277e
update snaps
neilkakkar May 19, 2022
7def139
Update snapshots
github-actions[bot] May 19, 2022
f215b48
Update snapshots
github-actions[bot] May 19, 2022
f135e45
fix tests
neilkakkar May 20, 2022
3a8a610
Merge branch 'retention-person-events' of github.com:PostHog/posthog …
neilkakkar May 20, 2022
d6936c2
resolve conflicts
neilkakkar May 23, 2022
6e89d51
Merge branch 'master' into retention-person-events
EDsCODE May 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions ee/clickhouse/queries/retention/clickhouse_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,17 @@ def actors_in_period(self, filter: RetentionFilter, team: Team):


def build_returning_event_query(
filter: RetentionFilter, team: Team, aggregate_users_by_distinct_id: Optional[bool] = None
filter: RetentionFilter,
team: Team,
aggregate_users_by_distinct_id: Optional[bool] = None,
using_person_on_events: bool = False,
):
returning_event_query_templated, returning_event_params = RetentionEventsQuery(
filter=filter.with_data({"breakdowns": []}), # Avoid pulling in breakdown values from returning event query
team=team,
event_query_type=RetentionQueryType.RETURNING,
aggregate_users_by_distinct_id=aggregate_users_by_distinct_id,
using_person_on_events=using_person_on_events,
).get_query()

query = substitute_params(returning_event_query_templated, returning_event_params)
Expand All @@ -157,7 +161,10 @@ def build_returning_event_query(


def build_target_event_query(
filter: RetentionFilter, team: Team, aggregate_users_by_distinct_id: Optional[bool] = None
filter: RetentionFilter,
team: Team,
aggregate_users_by_distinct_id: Optional[bool] = None,
using_person_on_events: bool = False,
):
target_event_query_templated, target_event_params = RetentionEventsQuery(
filter=filter,
Expand All @@ -168,6 +175,7 @@ def build_target_event_query(
else RetentionQueryType.TARGET
),
aggregate_users_by_distinct_id=aggregate_users_by_distinct_id,
using_person_on_events=using_person_on_events,
).get_query()

query = substitute_params(target_event_query_templated, target_event_params)
Expand Down
10 changes: 8 additions & 2 deletions ee/clickhouse/queries/retention/retention_actors.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,17 @@ def build_actor_activity_query(
person_ids
"""
returning_event_query = build_returning_event_query(
filter=filter, team=team, aggregate_users_by_distinct_id=aggregate_users_by_distinct_id
filter=filter,
team=team,
aggregate_users_by_distinct_id=aggregate_users_by_distinct_id,
using_person_on_events=team.actor_on_events_querying_enabled,
)

target_event_query = build_target_event_query(
filter=filter, team=team, aggregate_users_by_distinct_id=aggregate_users_by_distinct_id
filter=filter,
team=team,
aggregate_users_by_distinct_id=aggregate_users_by_distinct_id,
using_person_on_events=team.actor_on_events_querying_enabled,
)

all_params = {
Expand Down
34 changes: 28 additions & 6 deletions ee/clickhouse/queries/retention/retention_event_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from posthog.models.action.util import Action, format_action_filter
from posthog.models.filters.retention_filter import RetentionFilter
from posthog.models.team import Team
from posthog.models.utils import PersonPropertiesMode
from posthog.queries.util import format_ch_timestamp, get_trunc_func_ch


Expand All @@ -28,10 +29,14 @@ def __init__(
event_query_type: RetentionQueryType,
team: Team,
aggregate_users_by_distinct_id: Optional[bool] = None,
using_person_on_events: bool = False,
):
self._event_query_type = event_query_type
super().__init__(
filter=filter, team=team, override_aggregate_users_by_distinct_id=aggregate_users_by_distinct_id
filter=filter,
team=team,
override_aggregate_users_by_distinct_id=aggregate_users_by_distinct_id,
using_person_on_events=using_person_on_events,
)

self._trunc_func = get_trunc_func_ch(self._filter.period)
Expand Down Expand Up @@ -60,7 +65,7 @@ def get_query(self) -> Tuple[str, Dict[str, Any]]:
get_aggregation_target_field(
self._filter.aggregation_group_type_index,
self.EVENT_TABLE_ALIAS,
f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id",
f"{self.DISTINCT_ID_TABLE_ALIAS if not self._using_person_on_events else self.EVENT_TABLE_ALIAS}.person_id",
)
)
]
Expand All @@ -77,8 +82,8 @@ def get_query(self) -> Tuple[str, Dict[str, Any]]:
column = "properties"

if breakdown_type == "person":
table = "person"
column = "person_props"
table = "person" if not self._using_person_on_events else "events"
column = "person_props" if not self._using_person_on_events else "person_properties"

breakdown_values_expression = get_single_or_multi_property_string_expr(
breakdown=[breakdown["property"] for breakdown in self._filter.breakdowns],
Expand Down Expand Up @@ -131,7 +136,12 @@ def get_query(self) -> Tuple[str, Dict[str, Any]]:
date_query, date_params = self._get_date_filter()
self.params.update(date_params)

prop_query, prop_params = self._get_prop_groups(self._filter.property_groups)
prop_query, prop_params = self._get_prop_groups(
self._filter.property_groups,
person_properties_mode=PersonPropertiesMode.DIRECT_ON_EVENTS
if self._using_person_on_events
else PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN,
)

self.params.update(prop_params)

Expand Down Expand Up @@ -179,12 +189,24 @@ def _determine_should_join_distinct_ids(self) -> None:
else:
self._should_join_distinct_ids = True

def _determine_should_join_persons(self) -> None:
EnterpriseEventQuery._determine_should_join_persons(self)
if self._using_person_on_events:
self._should_join_distinct_ids = False
self._should_join_persons = False

def _get_entity_query(self, entity: Entity):
prepend = self._event_query_type
if entity.type == TREND_FILTER_TYPE_ACTIONS:
action = Action.objects.get(pk=entity.id)
action_query, params = format_action_filter(
team_id=self._team_id, action=action, prepend=prepend, use_loop=False
team_id=self._team_id,
action=action,
prepend=prepend,
use_loop=False,
person_properties_mode=PersonPropertiesMode.DIRECT_ON_EVENTS
if self._using_person_on_events
else PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN,
)
condition = action_query
elif entity.type == TREND_FILTER_TYPE_EVENTS:
Expand Down
20 changes: 12 additions & 8 deletions ee/clickhouse/test/test_journeys.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ee.clickhouse.sql.events import EVENTS_DATA_TABLE
from posthog.client import sync_execute
from posthog.models import Person, PersonDistinctId, Team
from posthog.test.base import flush_persons_and_events
from posthog.test.base import _create_event, flush_persons_and_events


def journeys_for(
Expand Down Expand Up @@ -37,6 +37,9 @@ def journeys_for(
And clarifies the preconditions of the test
"""

def _create_event_from_args(**event):
return {**event}

flush_persons_and_events()
people = {}
events_to_create = []
Expand All @@ -53,7 +56,7 @@ def journeys_for(
event["timestamp"] = datetime.now()

events_to_create.append(
_create_event(
_create_event_from_args(
team=team,
distinct_id=distinct_id,
event=event["event"],
Expand All @@ -69,12 +72,12 @@ def journeys_for(
)
)

_create_all_events(events_to_create)
_create_all_events_raw(events_to_create)

return people


def _create_all_events(all_events: List[Dict]):
def _create_all_events_raw(all_events: List[Dict]):
parsed = ""
for event in all_events:
data: Dict[str, Any] = {
Expand Down Expand Up @@ -102,6 +105,11 @@ def _create_all_events(all_events: List[Dict]):
)


def create_all_events(all_events: List[dict]):
for event in all_events:
_create_event(**event)


# We collect all events per test into an array and batch create the events to reduce creation time
@dataclasses.dataclass
class InMemoryEvent:
Expand All @@ -119,10 +127,6 @@ class InMemoryEvent:
group4_properties: Dict


def _create_event(**event):
return {**event}


def update_or_create_person(distinct_ids: List[str], team_id: int, **kwargs):
(person, _) = Person.objects.update_or_create(
persondistinctid__distinct_id__in=distinct_ids,
Expand Down
4 changes: 2 additions & 2 deletions ee/clickhouse/views/test/test_clickhouse_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.test import TestCase
from django.test.client import Client

from ee.clickhouse.test.test_journeys import _create_all_events, update_or_create_person
from ee.clickhouse.test.test_journeys import create_all_events, update_or_create_person
from ee.clickhouse.util import ClickhouseTestMixin, snapshot_clickhouse_queries
from ee.clickhouse.views.test.funnel.util import EventPattern
from posthog.api.test.test_organization import create_organization
Expand Down Expand Up @@ -469,7 +469,7 @@ def test_can_get_actors_and_use_percent_char_filter(self):


def setup_user_activity_by_day(daily_activity, team):
_create_all_events(
create_all_events(
[
{"distinct_id": person_id, "team": team, "timestamp": timestamp, **event}
for timestamp, people in daily_activity.items()
Expand Down