Skip to content

Commit ca724e1

Browse files
authored
Resolve errors with time to convert bins (#5283)
* time to convert test * rworkaround for time to convert bins * address comment
1 parent 9d5dd47 commit ca724e1

File tree

2 files changed

+67
-3
lines changed

2 files changed

+67
-3
lines changed

ee/clickhouse/queries/funnels/funnel_time_to_convert.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,19 @@ def get_query(self) -> str:
6060
]
6161
steps_average_conversion_time_expression_sum = " + ".join(steps_average_conversion_time_identifiers)
6262

63+
steps_average_conditional_for_invalid_values = [
64+
f"{identifier} >= 0" for identifier in steps_average_conversion_time_identifiers
65+
]
66+
# :HACK: Protect against CH bug https://github.com/ClickHouse/ClickHouse/issues/26580
67+
# once the issue is resolved, stop skipping the test: test_auto_bin_count_single_step_duplicate_events
68+
# and remove this comment
69+
6370
query = f"""
6471
WITH
6572
step_runs AS (
66-
{steps_per_person_query}
73+
SELECT * FROM (
74+
{steps_per_person_query}
75+
) WHERE {" AND ".join(steps_average_conditional_for_invalid_values)}
6776
),
6877
histogram_params AS (
6978
-- Binning ensures that each sample belongs to a bin in results
@@ -98,8 +107,6 @@ def get_query(self) -> str:
98107
histogram_from_seconds + floor(({steps_average_conversion_time_expression_sum} - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
99108
count() AS person_count
100109
FROM step_runs
101-
-- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too
102-
WHERE step_{to_step}_average_conversion_time_inner IS NOT NULL
103110
GROUP BY bin_from_seconds
104111
) results
105112
FULL OUTER JOIN (

ee/clickhouse/queries/funnels/test/test_funnel_time_to_convert.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import unittest
12
from uuid import uuid4
23

34
from ee.clickhouse.models.event import create_event
@@ -78,6 +79,62 @@ def test_auto_bin_count_single_step(self):
7879
},
7980
)
8081

82+
@unittest.skip("Wait for bug to be resolved")
83+
def test_auto_bin_count_single_step_duplicate_events(self):
84+
# demonstrates existing CH bug. Current patch is to remove negative times from consideration
85+
# Reference on what happens: https://github.com/ClickHouse/ClickHouse/issues/26580
86+
87+
_create_person(distinct_ids=["user a"], team=self.team)
88+
_create_person(distinct_ids=["user b"], team=self.team)
89+
_create_person(distinct_ids=["user c"], team=self.team)
90+
91+
_create_event(event="step one", distinct_id="user a", team=self.team, timestamp="2021-06-08 18:00:00")
92+
_create_event(event="step one", distinct_id="user a", team=self.team, timestamp="2021-06-08 19:00:00")
93+
# Converted from 0 to 1 in 3600 s
94+
_create_event(event="step one", distinct_id="user a", team=self.team, timestamp="2021-06-08 21:00:00")
95+
96+
_create_event(event="step one", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:00:00")
97+
_create_event(event="step one", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:37:00")
98+
# Converted from 0 to 1 in 2200 s
99+
100+
_create_event(event="step one", distinct_id="user c", team=self.team, timestamp="2021-06-11 07:00:00")
101+
_create_event(event="step one", distinct_id="user c", team=self.team, timestamp="2021-06-12 06:00:00")
102+
# Converted from 0 to 1 in 82_800 s
103+
104+
filter = Filter(
105+
data={
106+
"insight": INSIGHT_FUNNELS,
107+
"interval": "day",
108+
"date_from": "2021-06-07 00:00:00",
109+
"date_to": "2021-06-13 23:59:59",
110+
"funnel_from_step": 0,
111+
"funnel_to_step": 1,
112+
"funnel_window_days": 7,
113+
"events": [
114+
{"id": "step one", "order": 0},
115+
{"id": "step one", "order": 1},
116+
{"id": "step one", "order": 2},
117+
],
118+
}
119+
)
120+
121+
funnel_trends = ClickhouseFunnelTimeToConvert(filter, self.team, ClickhouseFunnel)
122+
results = funnel_trends.run()
123+
124+
# Autobinned using the minimum time to convert, maximum time to convert, and sample count
125+
self.assertEqual(
126+
results,
127+
{
128+
"bins": [
129+
(2220.0, 2), # Reached step 1 from step 0 in at least 2200 s but less than 29_080 s - users A and B
130+
(29080.0, 0), # Analogous to above, just an interval (in this case 26_880 s) up - no users
131+
(55940.0, 0), # Same as above
132+
(82800.0, 1), # Reached step 1 from step 0 in at least 82_800 s but less than 109_680 s - user C
133+
],
134+
"average_conversion_time": 29_540,
135+
},
136+
)
137+
81138
def test_custom_bin_count_single_step(self):
82139
_create_person(distinct_ids=["user a"], team=self.team)
83140
_create_person(distinct_ids=["user b"], team=self.team)

0 commit comments

Comments
 (0)