22from googleapiclient .discovery import build
33import os
44import pandas as pd
5+ import re
56
67
78ga_service_params = (
2324 lambda service , params : service .reports ().query (** params ).execute ()
2425)
2526
26- next_port = 8082
27+ next_port = None
2728default_service_system = None
2829
29- def authenticate (secret_name , service_params = ga_service_params ):
30- scopes , service_name , service_version , param_subs , query_func = service_params
31-
30+ def authenticate (secret_name , first_service_params = ga_service_params , * other_service_params , port = None ):
31+ service_param_sets = (first_service_params ,) + other_service_params
32+
33+ all_scopes = {scope for service_params in service_param_sets for scope in service_params [0 ]}
34+
3235 ANALYTICS_REPORTING_CLIENT_SECRET_PATH = os .getenv (secret_name )
3336
3437 flow = InstalledAppFlow .from_client_secrets_file (ANALYTICS_REPORTING_CLIENT_SECRET_PATH ,
35- scopes = scopes )
36-
38+ scopes = all_scopes )
39+
3740 global next_port
38- credentials = flow .run_local_server (port = next_port )
39- next_port += 1
41+
42+ if port is None :
43+ if next_port is None :
44+ port = 8082
45+ else :
46+ port = next_port
47+ next_port = port + 1
48+ elif next_port is None :
49+ next_port = port + 1
50+
51+ credentials = flow .run_local_server (port = port )
52+
53+ built_systems = [build_service_system (service_params , credentials ) for service_params in service_param_sets ]
54+
55+ return built_systems if len (built_systems ) > 1 else built_systems [0 ]
56+
57+ def build_service_system (service_params , credentials ):
58+ if len (service_params ) == 4 :
59+ service_name , service_version , param_subs_or_alt_api = service_params [1 :]
60+ query_func = None
61+ else :
62+ service_name , service_version , param_subs_or_alt_api , query_func = service_params [1 :]
4063
4164 # Build the service object.
4265 service = build (service_name , service_version , credentials = credentials )
4366
44- service_system = (service , query_func , param_subs , credentials )
67+ service_system = (service , query_func , param_subs_or_alt_api , credentials )
4568
4669 global default_service_system
4770 if default_service_system is None :
@@ -50,17 +73,26 @@ def authenticate(secret_name, service_params=ga_service_params):
5073 return service_system
5174
5275
53- def get_metrics_by_dimensions (metrics , dimensions , property , start_date , end_date , filters = None , segment = None , property_prefix = 'ga:' , service_system = None , max_results = 1000 , sort_results = None , ** other_params ):
54-
76+ def get_metrics_by_dimensions (metrics , dimensions , service_system = None , sort_results = None , ** other_params ):
5577 if service_system is None :
5678 service_system = default_service_system
5779
58- service , query_func , param_subs = service_system [:3 ]
80+ service , query_func , param_subs_or_alt_api = service_system [:3 ]
81+
82+ metrics = normalize_id_list (metrics )
83+ dimensions = normalize_id_list (dimensions )
84+ sort_results = normalize_id_list (sort_results )
5985
60- if isinstance (metrics , list ):
61- metrics = "," .join (metrics )
62- if isinstance (dimensions , list ):
63- dimensions = "," .join (dimensions )
86+ if query_func is None :
87+ return param_subs_or_alt_api (service , metrics , dimensions , sort_results = sort_results , ** other_params )
88+
89+ return get_metrics_by_dimensions_v3_style (service , query_func , param_subs_or_alt_api , metrics , dimensions , sort_results = sort_results , ** other_params )
90+
91+
92+ def get_metrics_by_dimensions_v3_style (service , query_func , param_subs , metrics , dimensions , property , start_date , end_date , sort_results , filters = None , segment = None , property_prefix = 'ga:' , max_results = 1000 , ** other_params ):
93+ metrics = join_id_list (metrics )
94+ dimensions = join_id_list (dimensions )
95+ sort_results = join_id_list (sort_results )
6496
6597 # Dimensions and Metrics...
6698 # Dimensions are atrributes, Metrics are quantitative measurements. e.g. city is a Dimension
@@ -99,7 +131,145 @@ def get_metrics_by_dimensions(metrics, dimensions, property, start_date, end_dat
99131 df = results_to_df (results )
100132
101133 return df
134+
135+
136+ def get_metrics_by_dimensions_v4_style (service , metrics , dimensions , property , start_date , end_date , sort_results , metric_filter = None , dimension_filter = None , base_metric_filter = None , base_dimension_filter = None , property_prefix = "properties/" , max_results = 1000 , ** other_params ):
137+ property = property_prefix + property
102138
139+ params = {
140+ "dateRanges" : [{"startDate" : start_date , "endDate" : end_date }],
141+ "metrics" : [{"name" : metric } for metric in metrics ],
142+ "dimensions" : [{"name" : dimension } for dimension in dimensions ],
143+ "metricFilter" : parse_filter_expressions ([base_metric_filter , metric_filter ], True ),
144+ "dimensionFilter" : parse_filter_expressions ([base_dimension_filter , dimension_filter ], False ),
145+ "orderBys" : [({"dimension" : {"dimensionName" : field }} if field in dimensions else {"metric" : {"metricName" : field }}) for field in sort_results ],
146+ "limit" : max_results
147+ }
148+
149+ offset = 0
150+ results = []
151+ rows_left = None
152+ has_more = True
153+
154+ while has_more :
155+ result = service .properties ().runReport (property = property , body = params ).execute ()
156+ if rows_left is None :
157+ rows_left = result .get ("rowsCount" , 0 )
158+ page_row_count = len (result ["rows" ]) if "rows" in result else 0
159+ has_more = page_row_count > 0
160+ if has_more :
161+ results .append (result )
162+ rows_left -= page_row_count
163+ if rows_left <= 0 :
164+ has_more = False
165+ else :
166+ offset += max_results
167+ params ["offset" ] = offset
168+
169+ df = v4_results_to_df (results , dimensions , metrics )
170+
171+ return df
172+
173+ def v4_results_to_df (results , dimensions , metrics ):
174+ if (len (results ) == 0 ):
175+ return pd .DataFrame (columns = dimensions + metrics )
176+
177+ df = pd .DataFrame ()
178+ for result in results :
179+ # Collect column names
180+ column_names = [header ["name" ] for header in result .get ("dimensionHeaders" , [])] + [header ["name" ] for header in result .get ("metricHeaders" , [])]
181+
182+ # Get data
183+ if "rows" in result :
184+ data = [[cell ["value" ] for cell in row .get ("dimensionValues" , [])] + [cell ["value" ] for cell in row .get ("metricValues" , [])] for row in result ["rows" ]]
185+ else :
186+ data = None
187+
188+ # Crete the dataframe
189+ df = pd .concat ([df , pd .DataFrame (data , columns = column_names )])
190+
191+ return df
192+
193+ ga4_service_params = (
194+ ['https://www.googleapis.com/auth/analytics.readonly' ],
195+ 'analyticsdata' , 'v1beta' ,
196+ get_metrics_by_dimensions_v4_style
197+ )
198+
199+ filter_match_re = re .compile (r"^(\w+)(?:(==|>|<|>=|<=|=@|=~)|(!=|!@|!~))(.*)$" )
200+ filter_escape_re = re .compile (r"\\([,;])" )
201+ filter_and_re = re .compile (r"((?:[^\\;]|\\(?:\\\\)*.)+)(?:;|$)" )
202+ filter_or_re = re .compile (r"((?:[^\\,]|\\(?:\\\\)*.)+)(?:,|$)" )
203+ filter_op_names = {
204+ "==" : "EQUAL" ,
205+ "!=" : "EQUAL" ,
206+ ">" : "GREATER_THAN" ,
207+ "<" : "LESS_THAN" ,
208+ ">=" : "GREATER_THAN_OR_EQUAL" ,
209+ "<=" : "LESS_THAN_OR_EQUAL" ,
210+ "=@" : "CONTAINS" ,
211+ "!@" : "CONTAINS" ,
212+ "=~" : "PARTIAL_REGEXP" ,
213+ "!~" : "PARTIAL_REGEXP"
214+ }
215+
216+ def parse_filter_expression (text , is_metric ):
217+ if not isinstance (text , str ):
218+ return text
219+
220+ def unescape (value ):
221+ return filter_escape_re .sub (r"\1" , value )
222+
223+ def parse_match (text ):
224+ field_name , plain_op , inverted_op , value = filter_match_re .match (text ).groups ()
225+ op_name = filter_op_names [plain_op or inverted_op ]
226+ if is_metric :
227+ plain_expression = {
228+ "filter" : {
229+ "fieldName" : field_name ,
230+ "numericFilter" : {
231+ "operation" : op_name ,
232+ "value" : {
233+ "int64Value" : value
234+ }
235+ }
236+ }
237+ }
238+ else :
239+ plain_expression = {
240+ "filter" : {
241+ "fieldName" : field_name ,
242+ "stringFilter" : {
243+ "matchType" : "EXACT" if op_name == "EQUAL" else op_name ,
244+ "value" : unescape (value ),
245+ "caseSensitive" : True
246+ }
247+ }
248+ }
249+ return plain_expression if plain_op else {"notExpression" : plain_expression }
250+
251+ def parse_or (text ):
252+ or_terms = [parse_match (t ) for t in filter_or_re .findall (text )]
253+ return or_terms [0 ] if len (or_terms ) == 1 else {"orGroup" : {"expressions" : or_terms }}
254+
255+ and_terms = [parse_or (t ) for t in filter_and_re .findall (text )]
256+ return and_terms [0 ] if len (and_terms ) == 1 else {"andGroup" : {"expressions" : and_terms }}
257+
258+ def parse_filter_expressions (filters , is_metric ):
259+ result = None
260+ for filter in filters :
261+ parsed = parse_filter_expression (filter , is_metric )
262+ if parsed :
263+ result = parsed if result is None else {"andGroup" : {"expressions" : [result , parsed ]}}
264+ return result
265+
266+
267+ def normalize_id_list (ids ):
268+ return (ids .split ("," ) if isinstance (ids , str ) else ids ) if ids else []
269+
270+ def join_id_list (ids ):
271+ return "," .join (ids ) if len (ids ) > 0 else None
272+
103273
104274def build_params (source , subs ):
105275 result = {}
0 commit comments