1+ import numpy as np
12import pandas as pd
23from .charts import get_data_df
34from .fields import *
45from urllib .parse import urlparse
6+ import datetime as dt
57
6- def get_flat_data_df (analytics_params , metrics , dimensions , remove_matches = None ):
8+ def get_flat_data_df (metrics , dimensions , remove_matches = None , ** other_params ):
79 """
810 Get a df from the Analytics API with a flat structure (no multiindex).
911
@@ -22,7 +24,7 @@ def get_flat_data_df(analytics_params, metrics, dimensions, remove_matches=None)
2224 df = get_data_df (
2325 metrics ,
2426 [dimension ["id" ] for dimension in dimensions ],
25- ** analytics_params ,
27+ ** other_params ,
2628 )
2729 if remove_matches is not None :
2830 for i , match in enumerate ([dimension ["remove_matches" ] for dimension in dimensions ]):
@@ -46,20 +48,20 @@ def get_outbound_links_df(analytics_params):
4648 pd .set_option ('future.no_silent_downcasting' , True )
4749 # Get the builtin "Click" event
4850 df_builtin_links = get_flat_data_df (
49- analytics_params ,
5051 [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
5152 [DIMENSION_PAGE_PATH , DIMENSION_BUILTIN_URL , DIMENSION_EVENT_NAME ],
52- remove_matches = [None , r"\s*" , None ]
53+ remove_matches = [None , r"\s*" , None ],
54+ ** analytics_params ,
5355 ).groupby (
5456 [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_BUILTIN_URL ["alias" ]]
5557 ).sum ().reset_index ()
5658
5759 # Get the custom "outbound_link_click" event
5860 df_custom_links = get_flat_data_df (
59- analytics_params ,
6061 [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS ],
6162 [DIMENSION_EVENT_NAME , DIMENSION_CUSTOM_URL , DIMENSION_PAGE_PATH ],
6263 remove_matches = [DIMENSION_EVENT_NAME ["remove_matches" ], r"\(not set\)" , None ],
64+ ** analytics_params ,
6365 ).groupby (
6466 [DIMENSION_PAGE_PATH ["alias" ], DIMENSION_CUSTOM_URL ["alias" ]]
6567 ).sum ().reset_index ()
@@ -117,17 +119,119 @@ def get_outbound_links_change(analytics_params, start_current, end_current, star
117119 "start_date" : start_previous ,
118120 "end_date" : end_previous ,
119121 }
120- print (analytics_params_month_2 )
121122 df_current = get_outbound_links_df (analytics_params_month_1 ).set_index (
122123 ["Page Path" , "Outbound Link" , "Hostname" ]
123124 )
124125 df_previous = get_outbound_links_df (analytics_params_month_2 ).set_index (
125126 ["Page Path" , "Outbound Link" , "Hostname" ]
126127 )
128+ total_clicks_percent_change = get_change (
129+ df_current ["Total Clicks" ],
130+ df_previous ["Total Clicks" ],
131+ start_current ,
132+ end_current ,
133+ start_previous ,
134+ end_previous
135+ )
136+ total_users_percent_change = get_change (
137+ df_current ["Total Users" ],
138+ df_previous ["Total Users" ],
139+ start_current ,
140+ end_current ,
141+ start_previous ,
142+ end_previous
143+ )
144+ df_reindexed = df_current .reindex (total_clicks_percent_change .index ).fillna (0 )
145+ df_reindexed ["Total Clicks Percent Change" ] = total_clicks_percent_change
146+ df_reindexed ["Total Users Percent Change" ] = total_users_percent_change
147+ return df_reindexed .sort_values (["Total Clicks" , "Total Users" ], ascending = False , kind = "stable" ).reset_index ()
148+
149+ def get_page_views_df (analytics_params ):
150+ """
151+ Get a DF with page views from the Analytics API.
152+
153+ :param analytics_params: the parameters for the Analytics API, including authentication and property ids
154+ :return: a DataFrame with the page views from the Analytics API
155+ """
156+ df_response = get_flat_data_df (
157+ [METRIC_EVENT_COUNT , METRIC_TOTAL_USERS , METRIC_PAGE_VIEW ],
158+ [DIMENSION_PAGE_PATH , DIMENSION_EVENT_NAME ],
159+ dimension_filter = "eventName==page_view" ,
160+ ** analytics_params ,
161+ ).rename (
162+ columns = {
163+ DIMENSION_PAGE_PATH ["alias" ]: "Page Path" ,
164+ METRIC_PAGE_VIEW : "Total Views" ,
165+ METRIC_TOTAL_USERS : "Total Users" ,
166+ }
167+ )[["Page Path" , "Total Views" , "Total Users" ]].copy ()
168+ return df_response
169+
170+ def get_page_views_change (analytics_params , start_current , end_current , start_previous , end_previous ):
171+ """
172+ Get a DF with page views from the Analytics API and a comparison for the prior month
173+ :param analytics_params: the parameters for the Analytics API, including authentication and property ids
174+ :param start_current: the start date for the current month in the format "YYYY-MM-DD"
175+ :param end_current: the end date for the current month
176+ :param start_previous: the start date for the previous month
177+ :param end_previous: the end date for the previous month
178+ """
179+ analytics_params_current = {
180+ ** analytics_params ,
181+ "start_date" : start_current ,
182+ "end_date" : end_current ,
183+ }
184+ analytics_params_previous = {
185+ ** analytics_params ,
186+ "start_date" : start_previous ,
187+ "end_date" : end_previous ,
188+ }
189+ df_current = get_page_views_df (analytics_params_current ).set_index ("Page Path" )
190+ df_previous = get_page_views_df (analytics_params_previous ).set_index ("Page Path" )
127191 combined_index = df_current .index .union (df_previous .index )
128192 df_current_reindexed = df_current .reindex (combined_index ).fillna (0 )
129193 df_previous_reindexed = df_previous .reindex (combined_index )
130- df_current_reindexed ["Total Clicks Percent Change" ] = (df_current_reindexed ["Total Clicks" ] / df_previous_reindexed ["Total Clicks" ]) - 1
131- df_current_reindexed ["Total Users Percent Change" ] = (df_current_reindexed ["Total Users" ] / df_previous_reindexed ["Total Users" ]) - 1
132- return df_current_reindexed .sort_values (["Total Clicks" , "Total Users" ], ascending = False , kind = "stable" ).reset_index ()
133-
194+ views_percent_change = get_change (
195+ df_current_reindexed ["Total Views" ],
196+ df_previous_reindexed ["Total Views" ],
197+ start_current ,
198+ end_current ,
199+ start_previous ,
200+ end_previous ,
201+ )
202+ users_percent_change = get_change (
203+ df_current_reindexed ["Total Users" ],
204+ df_previous_reindexed ["Total Users" ],
205+ start_current ,
206+ end_current ,
207+ start_previous ,
208+ end_previous ,
209+ )
210+ df_reindexed = df_current .reindex (views_percent_change .index ).fillna (0 )
211+ df_reindexed ["Total Views Percent Change" ] = views_percent_change
212+ df_reindexed ["Total Users Percent Change" ] = users_percent_change
213+ return df_reindexed .sort_values (["Total Views" , "Total Users" ], ascending = False , kind = "stable" ).reset_index ()
214+
215+ def get_change (series_current , series_previous , start_current , end_current , start_previous , end_previous , combined_index = None ):
216+ """
217+ Get the percent change between two serieses, accounting for different numbers of days in the month.
218+ :param series_current: the series representing the current month
219+ :param series_previous: the series representing the prior month
220+ :param start_current: the start date for the current month in the format "YYYY-MM-DD"
221+ :param end_current: the end date for the current month
222+ :param start_previous: the start date for the prior month
223+ :param end_previous: the end date for the prior month
224+ :return: a Series with the change between the two serieses
225+ """
226+ # Check that both serieses have the same index names
227+ assert series_current .index .names == series_previous .index .names
228+ # Reindex both serieses to have the same index
229+ combined_index = series_current .index .union (series_previous .index )
230+ current_length = float ((dt .datetime .fromisoformat (end_current ) - dt .datetime .fromisoformat (start_current )).days + 1 )
231+ previous_length = float ((dt .datetime .fromisoformat (end_previous ) - dt .datetime .fromisoformat (start_previous )).days + 1 )
232+ assert current_length != 0 and previous_length != 0
233+ series_current_reindexed = series_current .reindex (combined_index ).fillna (0 )
234+ # Adjust the values from the prior series to account for the different number of days in the month
235+ series_previous_reindexed = (series_previous .reindex (combined_index ) * current_length / previous_length )
236+ change = ((series_current_reindexed / series_previous_reindexed ) - 1 ).replace ({np .inf : np .nan })
237+ return change
0 commit comments