Skip to content

Commit 9c0c06b

Browse files
authored
chore: update analytics for july 2023 (clevercanary#892, clevercanary#893) (#3621)
* feat: update analytics package with features to support ga4 reports (anvilproject/anvil-portal#2662) * feat: add more flexibility to api usage in analytics package (anvil#2662) * feat: allow authenticating multiple service systems in analytics package (anvil#2662) * fix: take into account dimensions set to None (anvil#2662) * feat: standardize sort fields as list in analytics package (anvil#2662) * fix: use `items` instead of `iteritems` (anvil#2662) * feat: add ga4 support to analytics package (anvilproject/anvil-portal#2662) * feat: add `pre_plot_df_processor` option for plot over time (anvilproject/anvil-portal#2662) * fix: usage of `get_metrics_by_dimensions_v4_style` (anvilproject/anvil-portal#2662) * fix: return service system alone if there's only one (anvilproject/anvil-portal#2662) * feat: add `port` parameter to analytics authentication (anvilproject/anvil-portal#2662) * feat: additional features to help with ga4 (anvilproject/anvil-portal#2662) * feat: allow skipping api call in `get_data_df` (anvilproject/anvil-portal#2662) * feat: improve table rendering (anvilproject/anvil-portal#2662) * feat: better width for single columns (anvilproject/anvil-portal#2662) * feat: support differing parameters for analytics periods (anvilproject/anvil-portal#2806) * chore: update analytics for july 2023 (clevercanary#892, clevercanary#893)
1 parent cc07b4d commit 9c0c06b

File tree

10 files changed

+759
-907
lines changed

10 files changed

+759
-907
lines changed

analytics/analytics_package/analytics/api.py

Lines changed: 186 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from googleapiclient.discovery import build
33
import os
44
import pandas as pd
5+
import re
56

67

78
ga_service_params = (
@@ -23,25 +24,47 @@
2324
lambda service, params: service.reports().query(**params).execute()
2425
)
2526

26-
next_port = 8082
27+
next_port = None
2728
default_service_system = None
2829

29-
def authenticate(secret_name, service_params=ga_service_params):
30-
scopes, service_name, service_version, param_subs, query_func = service_params
31-
30+
def authenticate(secret_name, first_service_params=ga_service_params, *other_service_params, port=None):
31+
service_param_sets = (first_service_params,) + other_service_params
32+
33+
all_scopes = {scope for service_params in service_param_sets for scope in service_params[0]}
34+
3235
ANALYTICS_REPORTING_CLIENT_SECRET_PATH=os.getenv(secret_name)
3336

3437
flow = InstalledAppFlow.from_client_secrets_file(ANALYTICS_REPORTING_CLIENT_SECRET_PATH,
35-
scopes=scopes)
36-
38+
scopes=all_scopes)
39+
3740
global next_port
38-
credentials = flow.run_local_server(port=next_port)
39-
next_port += 1
41+
42+
if port is None:
43+
if next_port is None:
44+
port = 8082
45+
else:
46+
port = next_port
47+
next_port = port + 1
48+
elif next_port is None:
49+
next_port = port + 1
50+
51+
credentials = flow.run_local_server(port=port)
52+
53+
built_systems = [build_service_system(service_params, credentials) for service_params in service_param_sets]
54+
55+
return built_systems if len(built_systems) > 1 else built_systems[0]
56+
57+
def build_service_system(service_params, credentials):
58+
if len(service_params) == 4:
59+
service_name, service_version, param_subs_or_alt_api = service_params[1:]
60+
query_func = None
61+
else:
62+
service_name, service_version, param_subs_or_alt_api, query_func = service_params[1:]
4063

4164
# Build the service object.
4265
service = build(service_name, service_version, credentials=credentials)
4366

44-
service_system = (service, query_func, param_subs, credentials)
67+
service_system = (service, query_func, param_subs_or_alt_api, credentials)
4568

4669
global default_service_system
4770
if default_service_system is None:
@@ -50,17 +73,26 @@ def authenticate(secret_name, service_params=ga_service_params):
5073
return service_system
5174

5275

53-
def get_metrics_by_dimensions(metrics, dimensions, property, start_date, end_date, filters=None, segment=None, property_prefix='ga:', service_system=None, max_results=1000, sort_results=None, **other_params):
54-
76+
def get_metrics_by_dimensions(metrics, dimensions, service_system=None, sort_results=None, **other_params):
5577
if service_system is None:
5678
service_system = default_service_system
5779

58-
service, query_func, param_subs = service_system[:3]
80+
service, query_func, param_subs_or_alt_api = service_system[:3]
81+
82+
metrics = normalize_id_list(metrics)
83+
dimensions = normalize_id_list(dimensions)
84+
sort_results = normalize_id_list(sort_results)
5985

60-
if isinstance(metrics, list):
61-
metrics = ",".join(metrics)
62-
if isinstance(dimensions, list):
63-
dimensions = ",".join(dimensions)
86+
if query_func is None:
87+
return param_subs_or_alt_api(service, metrics, dimensions, sort_results=sort_results, **other_params)
88+
89+
return get_metrics_by_dimensions_v3_style(service, query_func, param_subs_or_alt_api, metrics, dimensions, sort_results=sort_results, **other_params)
90+
91+
92+
def get_metrics_by_dimensions_v3_style(service, query_func, param_subs, metrics, dimensions, property, start_date, end_date, sort_results, filters=None, segment=None, property_prefix='ga:', max_results=1000, **other_params):
93+
metrics = join_id_list(metrics)
94+
dimensions = join_id_list(dimensions)
95+
sort_results = join_id_list(sort_results)
6496

6597
# Dimensions and Metrics...
6698
# Dimensions are atrributes, Metrics are quantitative measurements. e.g. city is a Dimension
@@ -99,7 +131,145 @@ def get_metrics_by_dimensions(metrics, dimensions, property, start_date, end_dat
99131
df = results_to_df(results)
100132

101133
return df
134+
135+
136+
def get_metrics_by_dimensions_v4_style(service, metrics, dimensions, property, start_date, end_date, sort_results, metric_filter=None, dimension_filter=None, base_metric_filter=None, base_dimension_filter=None, property_prefix="properties/", max_results=1000, **other_params):
137+
property = property_prefix + property
102138

139+
params = {
140+
"dateRanges": [{"startDate": start_date, "endDate": end_date}],
141+
"metrics": [{"name": metric} for metric in metrics],
142+
"dimensions": [{"name": dimension} for dimension in dimensions],
143+
"metricFilter": parse_filter_expressions([base_metric_filter, metric_filter], True),
144+
"dimensionFilter": parse_filter_expressions([base_dimension_filter, dimension_filter], False),
145+
"orderBys": [({"dimension": {"dimensionName": field}} if field in dimensions else {"metric": {"metricName": field}}) for field in sort_results],
146+
"limit": max_results
147+
}
148+
149+
offset = 0
150+
results = []
151+
rows_left = None
152+
has_more = True
153+
154+
while has_more:
155+
result = service.properties().runReport(property=property, body=params).execute()
156+
if rows_left is None:
157+
rows_left = result.get("rowsCount", 0)
158+
page_row_count = len(result["rows"]) if "rows" in result else 0
159+
has_more = page_row_count > 0
160+
if has_more:
161+
results.append(result)
162+
rows_left -= page_row_count
163+
if rows_left <= 0:
164+
has_more = False
165+
else:
166+
offset += max_results
167+
params["offset"] = offset
168+
169+
df = v4_results_to_df(results, dimensions, metrics)
170+
171+
return df
172+
173+
def v4_results_to_df(results, dimensions, metrics):
174+
if (len(results) == 0):
175+
return pd.DataFrame(columns=dimensions + metrics)
176+
177+
df = pd.DataFrame()
178+
for result in results:
179+
# Collect column names
180+
column_names = [header["name"] for header in result.get("dimensionHeaders", [])] + [header["name"] for header in result.get("metricHeaders", [])]
181+
182+
# Get data
183+
if "rows" in result:
184+
data = [[cell["value"] for cell in row.get("dimensionValues", [])] + [cell["value"] for cell in row.get("metricValues", [])] for row in result["rows"]]
185+
else:
186+
data = None
187+
188+
# Crete the dataframe
189+
df = pd.concat([df, pd.DataFrame(data, columns = column_names)])
190+
191+
return df
192+
193+
ga4_service_params = (
194+
['https://www.googleapis.com/auth/analytics.readonly'],
195+
'analyticsdata', 'v1beta',
196+
get_metrics_by_dimensions_v4_style
197+
)
198+
199+
filter_match_re = re.compile(r"^(\w+)(?:(==|>|<|>=|<=|=@|=~)|(!=|!@|!~))(.*)$")
200+
filter_escape_re = re.compile(r"\\([,;])")
201+
filter_and_re = re.compile(r"((?:[^\\;]|\\(?:\\\\)*.)+)(?:;|$)")
202+
filter_or_re = re.compile(r"((?:[^\\,]|\\(?:\\\\)*.)+)(?:,|$)")
203+
filter_op_names = {
204+
"==": "EQUAL",
205+
"!=": "EQUAL",
206+
">": "GREATER_THAN",
207+
"<": "LESS_THAN",
208+
">=": "GREATER_THAN_OR_EQUAL",
209+
"<=": "LESS_THAN_OR_EQUAL",
210+
"=@": "CONTAINS",
211+
"!@": "CONTAINS",
212+
"=~": "PARTIAL_REGEXP",
213+
"!~": "PARTIAL_REGEXP"
214+
}
215+
216+
def parse_filter_expression(text, is_metric):
217+
if not isinstance(text, str):
218+
return text
219+
220+
def unescape(value):
221+
return filter_escape_re.sub(r"\1", value)
222+
223+
def parse_match(text):
224+
field_name, plain_op, inverted_op, value = filter_match_re.match(text).groups()
225+
op_name = filter_op_names[plain_op or inverted_op]
226+
if is_metric:
227+
plain_expression = {
228+
"filter": {
229+
"fieldName": field_name,
230+
"numericFilter": {
231+
"operation": op_name,
232+
"value": {
233+
"int64Value": value
234+
}
235+
}
236+
}
237+
}
238+
else:
239+
plain_expression = {
240+
"filter": {
241+
"fieldName": field_name,
242+
"stringFilter": {
243+
"matchType": "EXACT" if op_name == "EQUAL" else op_name,
244+
"value": unescape(value),
245+
"caseSensitive": True
246+
}
247+
}
248+
}
249+
return plain_expression if plain_op else {"notExpression": plain_expression}
250+
251+
def parse_or(text):
252+
or_terms = [parse_match(t) for t in filter_or_re.findall(text)]
253+
return or_terms[0] if len(or_terms) == 1 else {"orGroup": {"expressions": or_terms}}
254+
255+
and_terms = [parse_or(t) for t in filter_and_re.findall(text)]
256+
return and_terms[0] if len(and_terms) == 1 else {"andGroup": {"expressions": and_terms}}
257+
258+
def parse_filter_expressions(filters, is_metric):
259+
result = None
260+
for filter in filters:
261+
parsed = parse_filter_expression(filter, is_metric)
262+
if parsed:
263+
result = parsed if result is None else {"andGroup": {"expressions": [result, parsed]}}
264+
return result
265+
266+
267+
def normalize_id_list(ids):
268+
return (ids.split(",") if isinstance(ids, str) else ids) if ids else []
269+
270+
def join_id_list(ids):
271+
return ",".join(ids) if len(ids) > 0 else None
272+
103273

104274
def build_params(source, subs):
105275
result = {}

0 commit comments

Comments
 (0)