1+ import datetime as dt
2+ from .charts import get_data_df , get_df_over_time
3+ from .entities import ADDITIONAL_DATA_BEHAVIOR
4+ import numpy as np
5+ import pandas as pd
6+
7+ def get_data_df_from_fields (metrics , dimensions , ** other_params ):
8+ """
9+ Get a df from the Analytics API with metrics and dimensions as specified in fields.py
10+
11+ :param metrics: the metrics to get
12+ :param dimensions: the dimensions to get
13+ :param other_params: any other parameters to be passed to the get_data_df function, including service params
14+ :return: a DataFrame with the data from the Analytics API.
15+ The DF has an arbitrary RangeIndex,
16+ string columns containing dimensions with names equal to the dimension alias value,
17+ and int columns containing metrics with names equal to the metric alias value.
18+ """
19+ df = get_data_df (
20+ [metric ["id" ] for metric in metrics ],
21+ [dimension ["id" ] for dimension in dimensions ],
22+ ** other_params
23+ )
24+ return df .reset_index ().rename (columns = get_rename_dict (dimensions + metrics )).copy ()
25+
26+
27+ def get_rename_dict (dimensions ):
28+ """Get a dictionary to rename the columns of a DataFrame."""
29+ return dict (
30+ zip ([dimension ["id" ] for dimension in dimensions ], [dimension ["alias" ] for dimension in dimensions ])
31+ )
32+
33+
34+ def get_one_period_change_series (series_current , series_previous , start_current , end_current , start_previous , end_previous ):
35+ """
36+ Get the percent change between two serieses, accounting for different numbers of days in the month.
37+ :param series_current: the series representing the current month
38+ :param series_previous: the series representing the prior month
39+ :param start_current: the start date for the current month in the format "YYYY-MM-DD"
40+ :param end_current: the end date for the current month
41+ :param start_previous: the start date for the prior month
42+ :param end_previous: the end date for the prior month
43+ :return: a Series with the change between the two serieses
44+ """
45+ # Check that both serieses have the same index names
46+ assert series_current .index .names == series_previous .index .names
47+ # Reindex both serieses to have the same index
48+ combined_index = series_current .index .union (series_previous .index )
49+ current_length = float ((dt .datetime .fromisoformat (end_current ) - dt .datetime .fromisoformat (start_current )).days + 1 )
50+ previous_length = float ((dt .datetime .fromisoformat (end_previous ) - dt .datetime .fromisoformat (start_previous )).days + 1 )
51+ assert current_length != 0 and previous_length != 0
52+ series_current_reindexed = series_current .reindex (combined_index ).fillna (0 )
53+ # Adjust the values from the prior series to account for the different number of days in the month
54+ series_previous_reindexed = (series_previous .reindex (combined_index ) * current_length / previous_length )
55+ change = ((series_current_reindexed / series_previous_reindexed ) - 1 ).replace ({np .inf : np .nan })
56+ return change
57+
58+
59+ def get_change_over_time_df (
60+ metrics , time_dimension , include_changes = True , additional_data_path = None , additional_data_behavior = None , strftime_format = "%Y-%m" , ** other_params
61+ ):
62+ """
63+ Get a DataFrame with the change over time for the given metrics, renamed to match metric_titles
64+ :param metrics: the metrics to be displayed
65+ :param time_dimension: the time dimension to be used
66+ :param include_changes: whether to include the percent change columns, defaults to True
67+ :param additional_data_path: the path to a JSON file with additional data to be added to the DataFrame, defaults to None
68+ :param additional_data_behavior: the behavior to use when adding the additional data, defaults to None
69+ :param strftime_format: the format to use for the time dimension, defaults to "%Y-%m". None means a datetime will be returned
70+ :param other_params: any other parameters to be passed to the get_df_over_time function, including service params
71+ :returns: a datetime with the values of the metrics for each time dimension.
72+ Columns are the time dimension alias (as a datetime), metric aliases (as ints), and change metric aliases (as floats)
73+ """
74+ df_api = get_df_over_time (
75+ [metric ["alias" ] for metric in metrics ],
76+ [metric ["id" ] for metric in metrics ],
77+ time_dimension ["id" ],
78+ sort_results = [time_dimension ["id" ]],
79+ df_processor = (lambda df : df .set_index (df .index + "01" ).sort_index (ascending = False )),
80+ format_table = False ,
81+ ** other_params
82+ ).rename ({time_dimension ["id" ]: time_dimension ["alias" ]})
83+
84+ df_combined = pd .DataFrame ()
85+
86+ if additional_data_path is not None :
87+ assert additional_data_behavior is not None
88+ df_saved = pd .read_json (additional_data_path )
89+ if additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR .ADD :
90+ df_combined = df_api .add (df_saved .astype (int ), fill_value = 0 )[::- 1 ]
91+ elif additional_data_behavior == ADDITIONAL_DATA_BEHAVIOR .REPLACE :
92+ df_combined = pd .concat ([df_saved , df_api ], ignore_index = False )
93+ df_combined = df_combined .loc [~ df_combined .index .duplicated (keep = "first" )].sort_index (ascending = False )
94+ else :
95+ df_combined = df_api
96+
97+ if include_changes :
98+ df_combined [
99+ [metric ["change_alias" ] for metric in metrics ]
100+ ] = df_combined [
101+ [metric ["alias" ] for metric in metrics ]
102+ ].pct_change (periods = - 1 ).replace ({np .inf : np .nan })
103+
104+ if strftime_format is not None :
105+ df_combined .index = pd .to_datetime (df_combined .index ).strftime (strftime_format )
106+
107+ return df_combined .reset_index (names = time_dimension ["alias" ])
0 commit comments