55from urllib .parse import urlparse
66import datetime as dt
77
8- def get_flat_data_df (metrics , dimensions , remove_matches = None , ** other_params ):
8+ def get_flat_data_df (metrics , dimensions , ** other_params ):
99 """
1010 Get a df from the Analytics API with a flat structure (no multiindex).
1111
1212 :param analytics_params: the parameters for the Analytics API, including authentication and property ids
1313 :param metrics: the metrics to get
1414 :param dimensions: the dimensions to get
15- :param remove_matches: a list of regex patterns or None elements to remove from each dimension.
16- Each regex or None element should correspond with an element of dimensions and remove_matches must be the same length as dimensions.
17- If the value is None, no patterns are removed, defaults to None.
18-
1915 :return: a DataFrame with the data from the Analytics API
2016 """
21- if remove_matches is not None :
22- assert len (remove_matches ) == len (dimensions )
23-
2417 df = get_data_df (
2518 metrics ,
2619 [dimension ["id" ] for dimension in dimensions ],
2720 ** other_params ,
2821 )
29- if remove_matches is not None :
30- for i , match in enumerate ([dimension ["remove_matches" ] for dimension in dimensions ]):
31- if match is not None :
32- df = df .loc [~ df .index .get_level_values (i ).str .fullmatch (match )]
3322 return df .reset_index ().rename (columns = get_rename_dict (dimensions )).copy ()
3423
3524def get_rename_dict (dimensions ):
@@ -41,26 +30,27 @@ def get_rename_dict(dimensions):
4130def get_outbound_links_df (analytics_params ):
4231 """
4332 Get a DF with outbound links from the Analytics API. Merges the builtin and custom events for outbound links.
33+ analytics_params cannot currently include a dimension_filter
4434
4535 :param analytics_params: the parameters for the Analytics API, including authentication and property ids
4636 :return: a DataFrame with the outbound links from the Analytics API
4737 """
4838 pd .set_option ('future.no_silent_downcasting' , True )
39+ assert "dimension_filter" not in analytics_params
4940 # Get the builtin "Click" event
5041 df_builtin_links = get_flat_data_df (
5142 [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
5243 [DIMENSION_PAGE_PATH , DIMENSION_BUILTIN_URL , DIMENSION_EVENT_NAME ],
53- remove_matches = [ None , r"\s*" , None ] ,
44+ dimension_filter = f"eventName== { EVENT_BUILTIN_CLICK } " ,
5445 ** analytics_params ,
5546 ).groupby (
5647 [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_BUILTIN_URL ["alias" ]]
5748 ).sum ().reset_index ()
58-
5949 # Get the custom "outbound_link_click" event
6050 df_custom_links = get_flat_data_df (
6151 [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
6252 [DIMENSION_EVENT_NAME , DIMENSION_CUSTOM_URL , DIMENSION_PAGE_PATH ],
63- remove_matches = [ DIMENSION_EVENT_NAME [ "remove_matches" ], r"\(not set\)" , None ] ,
53+ dimension_filter = f"eventName== { EVENT_CUSTOM_CLICK } " ,
6454 ** analytics_params ,
6555 ).groupby (
6656 [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_CUSTOM_URL ["alias" ]]
@@ -153,11 +143,12 @@ def get_page_views_df(analytics_params):
153143 :param analytics_params: the parameters for the Analytics API, including authentication and property ids
154144 :return: a DataFrame with the page views from the Analytics API
155145 """
146+ assert "dimension_filter" not in analytics_params
156147 df_response = get_flat_data_df (
157148 [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS , METRIC_PAGE_VIEW ],
158149 [DIMENSION_PAGE_PATH , DIMENSION_EVENT_NAME ],
159- dimension_filter = "eventName==page_view" ,
160150 ** analytics_params ,
151+ dimension_filter = f"eventName=={ EVENT_PAGE_VIEW } " ,
161152 ).rename (
162153 columns = {
163154 DIMENSION_PAGE_PATH ["alias" ]: "Page Path" ,
0 commit comments