Skip to content

Commit be8484e

Browse files
author
Jonah Paten
authored
feat: explorer index tracking functions (#4385) (#4391)
* feat: added new table-related elements (#4385) * fix: use batch updating in calls to gspread_formatting (#4385) * chore: bump setup.py, change pagination analytics sort order (#4385)
1 parent 3a98b06 commit be8484e

File tree

4 files changed

+379
-94
lines changed

4 files changed

+379
-94
lines changed

analytics/analytics_package/analytics/entities.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@
5151
EVENT_CUSTOM_CLICK = "outbound_link_clicked"
5252
# The builtin page view event.
5353
EVENT_PAGE_VIEW = "page_view"
54-
54+
EVENT_FILE_DOWNLOADED = "file_downloaded"
55+
EVENT_ENTITY_SELECTED = "entity_selected"
56+
EVENT_ENTITY_TABLE_SORTED = "entity_table_sorted"
57+
EVENT_ENTITY_TABLE_PAGINATED = "entity_table_paginated"
58+
EVENT_FILTER_SELECTED = "filter_selected"
5559
# DIMENSIONS
5660
# The path to the page the user is on when the event occurs. Does not include fragments or parameters
5761
DIMENSION_PAGE_PATH = {
@@ -83,6 +87,42 @@
8387
"id": "yearMonth",
8488
"alias": "Month",
8589
}
90+
DIMENSION_RELATED_ENTITY_ID = {
91+
"id": "customEvent:related_entity_id",
92+
"alias": "Related Entity ID",
93+
}
94+
DIMENSION_RELATED_ENTITY_NAME = {
95+
"id": "customEvent:related_entity_name",
96+
"alias": "Related Entity Name",
97+
}
98+
DIMENSION_ENTITY_NAME = {
99+
"id": "customEvent:entity_name",
100+
"alias": "Entity Name",
101+
}
102+
DIMENSION_ENTITY_NAME_TAB = {
103+
**DIMENSION_ENTITY_NAME,
104+
"alias": "Entity Name (Tab)",
105+
}
106+
DIMENSION_PAGINATION_DIRECTION = {
107+
"id": "customEvent:pagination_direction",
108+
"alias": "Direction",
109+
}
110+
DIMENSION_SORT_DIRECTION = {
111+
"id": "customEvent:sort_direction",
112+
"alias": "Direction",
113+
}
114+
DIMENSION_COLUMN_NAME = {
115+
"id": "customEvent:column_name",
116+
"alias": "Column Name",
117+
}
118+
DIMENSION_FILTER_NAME = {
119+
"id": "customEvent:filter_name",
120+
"alias": "Filter Name",
121+
}
122+
DIMENSION_FILTER_VALUE = {
123+
"id": "customEvent:filter_value",
124+
"alias": "Filter Value",
125+
}
86126
# The hostname of the clicked link. Based on DIMENSION_CUSTOM_URL and DIMENSION_BUILTIN_URL
87127
SYNTHETIC_DIMENSION_CLICKED_HOSTNAME = {
88128
"id": None,

analytics/analytics_package/analytics/sheets_api.py

Lines changed: 92 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -220,102 +220,104 @@ def fill_worksheet_with_df(
220220
[df_to_insert.columns.values.tolist()] + df_to_insert.fillna("NA").values.tolist(),
221221
**{**DEFAULT_GSPREAD_UPDATE_ARGS, **gspread_update_args}
222222
)
223-
224-
# Format worksheet
225-
# Justify Column Widths
226-
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
227-
text_widths = df.astype(str).columns.map(
228-
lambda column_name: df[column_name].astype(str).str.len().max()
229-
)
230-
header_widths = df.columns.str.len()
231-
buffer_chars = (
232-
DEFAULT_BUFFER_CHARS
233-
if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"])
234-
else sheet_formatting_options_filled["column_widths"]["buffer_chars"]
235-
)
236-
data_column_widths = [
237-
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
238-
for len_tuple in zip(text_widths, header_widths)
239-
]
240-
extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"]
241-
combined_column_widths = data_column_widths + extra_column_widths
242-
column_positions = [
243-
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths)
244-
]
245-
gspread_formatting.set_column_widths(worksheet, zip(column_positions, combined_column_widths))
246-
# Freeze Header
247-
if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]:
248-
gspread_formatting.set_frozen(worksheet, rows=1)
249-
base_format_options = gspread_formatting.CellFormat()
250-
# Bold Header
251-
if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]:
252-
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
253-
# Center Header
254-
if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]:
255-
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
256-
# Handle column specific formatting
257-
for column in column_formatting_options:
258-
if column not in df.columns:
259-
raise KeyError("Formatting column is not in the dataframe")
260-
# Skip if the column is set to default
261-
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.DEFAULT:
262-
continue
263-
# Get the column position
264-
column_position_numeric = df.columns.get_loc(column) + 1
265-
column_range_top = gspread.utils.rowcol_to_a1(1, column_position_numeric)
266-
column_range_bottom = gspread.utils.rowcol_to_a1(df.index.size + 1, column_position_numeric)
267-
column_range = f"{column_range_top}:{column_range_bottom}"
268-
column_worksheet_range = gspread_formatting.GridRange.from_a1_range(column_range, worksheet)
269-
# Get conditional formatting rules
270-
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.PERCENT_COLORED:
271-
green_rule = gspread_formatting.ConditionalFormatRule(
272-
ranges=[column_worksheet_range],
273-
booleanRule=gspread_formatting.BooleanRule(
274-
condition=gspread_formatting.BooleanCondition('NUMBER_GREATER_THAN_EQ', ['0']),
275-
format=gspread_formatting.CellFormat(
276-
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(0,1,0)))
277-
)
278-
)
279-
red_rule = gspread_formatting.ConditionalFormatRule(
280-
ranges=[column_worksheet_range],
281-
booleanRule=gspread_formatting.BooleanRule(
282-
condition=gspread_formatting.BooleanCondition('NUMBER_LESS_THAN_EQ', ['0']),
283-
format=gspread_formatting.CellFormat(
284-
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(1,0,0)))
285-
)
286-
)
287-
# Apply conditional formatting rules
288-
conditional_formatting_rules = gspread_formatting.get_conditional_format_rules(worksheet)
289-
conditional_formatting_rules.append(green_rule)
290-
conditional_formatting_rules.append(red_rule)
291-
conditional_formatting_rules.save()
292-
if column_formatting_options[column] in (COLUMN_FORMAT_OPTIONS.PERCENT_COLORED, COLUMN_FORMAT_OPTIONS.PERCENT_UNCOLORED):
293-
# Apply percent format rule
294-
gspread_formatting.format_cell_range(
295-
worksheet,
296-
column_range,
297-
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
223+
224+
# Batch formatting updates to increase
225+
with gspread_formatting.batch_updater(worksheet.spreadsheet) as batch:
226+
# Format worksheet
227+
# Justify Column Widths
228+
if "column_widths" not in sheet_formatting_options_filled or sheet_formatting_options_filled["column_widths"]["justify"]:
229+
text_widths = df.astype(str).columns.map(
230+
lambda column_name: df[column_name].astype(str).str.len().max()
298231
)
299-
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
300-
# Apply date format rule
301-
gspread_formatting.format_cell_range(
302-
worksheet,
303-
column_range,
304-
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
232+
header_widths = df.columns.str.len()
233+
buffer_chars = (
234+
DEFAULT_BUFFER_CHARS
235+
if ("column_widths" not in sheet_formatting_options_filled or "buffer_chars" not in sheet_formatting_options_filled["column_widths"])
236+
else sheet_formatting_options_filled["column_widths"]["buffer_chars"]
305237
)
238+
data_column_widths = [
239+
round((max(len_tuple) + buffer_chars) * FONT_SIZE_PTS * 1/PTS_PIXELS_RATIO)
240+
for len_tuple in zip(text_widths, header_widths)
241+
]
242+
extra_column_widths = [sheet_formatting_options_filled["extra_columns_width"]] * sheet_formatting_options_filled["extra_columns"]
243+
combined_column_widths = data_column_widths + extra_column_widths
244+
column_positions = [
245+
gspread.utils.rowcol_to_a1(1, i + 1)[0] for i, _ in enumerate(combined_column_widths)
246+
]
247+
batch.set_column_widths(worksheet, zip(column_positions, combined_column_widths))
248+
# Freeze Header
249+
if "freeze_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["freeze_header"]:
250+
batch.set_frozen(worksheet, rows=1)
251+
base_format_options = gspread_formatting.CellFormat()
252+
# Bold Header
253+
if "bold_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["bold_header"]:
254+
base_format_options += gspread_formatting.CellFormat(textFormat=gspread_formatting.TextFormat(bold=True))
255+
# Center Header
256+
if "center_header" not in sheet_formatting_options_filled or sheet_formatting_options_filled["center_header"]:
257+
base_format_options += gspread_formatting.CellFormat(horizontalAlignment="CENTER")
258+
# Handle column specific formatting
259+
for column in column_formatting_options:
260+
if column not in df.columns:
261+
raise KeyError("Formatting column is not in the dataframe")
262+
# Skip if the column is set to default
263+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.DEFAULT:
264+
continue
265+
# Get the column position
266+
column_position_numeric = df.columns.get_loc(column) + 1
267+
column_range_top = gspread.utils.rowcol_to_a1(1, column_position_numeric)
268+
column_range_bottom = gspread.utils.rowcol_to_a1(df.index.size + 1, column_position_numeric)
269+
column_range = f"{column_range_top}:{column_range_bottom}"
270+
column_worksheet_range = gspread_formatting.GridRange.from_a1_range(column_range, worksheet)
271+
# Get conditional formatting rules
272+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.PERCENT_COLORED:
273+
green_rule = gspread_formatting.ConditionalFormatRule(
274+
ranges=[column_worksheet_range],
275+
booleanRule=gspread_formatting.BooleanRule(
276+
condition=gspread_formatting.BooleanCondition('NUMBER_GREATER_THAN_EQ', ['0']),
277+
format=gspread_formatting.CellFormat(
278+
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(0,1,0)))
279+
)
280+
)
281+
red_rule = gspread_formatting.ConditionalFormatRule(
282+
ranges=[column_worksheet_range],
283+
booleanRule=gspread_formatting.BooleanRule(
284+
condition=gspread_formatting.BooleanCondition('NUMBER_LESS_THAN_EQ', ['0']),
285+
format=gspread_formatting.CellFormat(
286+
textFormat=gspread_formatting.TextFormat(foregroundColor=gspread_formatting.Color(1,0,0)))
287+
)
288+
)
289+
# Apply conditional formatting rules
290+
conditional_formatting_rules = gspread_formatting.get_conditional_format_rules(worksheet)
291+
conditional_formatting_rules.append(green_rule)
292+
conditional_formatting_rules.append(red_rule)
293+
conditional_formatting_rules.save()
294+
if column_formatting_options[column] in (COLUMN_FORMAT_OPTIONS.PERCENT_COLORED, COLUMN_FORMAT_OPTIONS.PERCENT_UNCOLORED):
295+
# Apply percent format rule
296+
gspread_formatting.format_cell_range(
297+
worksheet,
298+
column_range,
299+
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='PERCENT', pattern='0.0%'))
300+
)
301+
if column_formatting_options[column] == COLUMN_FORMAT_OPTIONS.YEAR_MONTH_DATE:
302+
# Apply date format rule
303+
gspread_formatting.format_cell_range(
304+
worksheet,
305+
column_range,
306+
gspread_formatting.CellFormat(numberFormat=gspread_formatting.NumberFormat(type='DATE', pattern='yyyy-mm'))
307+
)
306308

307-
# Apply base formatting options
308-
gspread_formatting.format_cell_range(
309-
worksheet,
310-
f"A1:{gspread.utils.rowcol_to_a1(1, len(df.columns))}",
311-
base_format_options
312-
)
309+
# Apply base formatting options
310+
batch.format_cell_range(
311+
worksheet,
312+
f"A1:{gspread.utils.rowcol_to_a1(1, len(df.columns))}",
313+
base_format_options
314+
)
313315

314316
# Delete Sheet1 if it has been created by default
315317
if "Sheet1" in [i.title for i in sheet.worksheets()]:
316318
sheet.del_worksheet(sheet.worksheet("Sheet1"))
317319

318-
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, **gspread_update_args):
320+
def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatting_options={}, column_formatting_options={}, gspread_update_args={}):
319321
"""
320322
Fill a sheet with the contents of a dictionary of DataFrames.
321323
The keys of the dictionary are the names of the worksheets, and the values contain the data to be placed in the sheet.
@@ -343,13 +345,14 @@ def fill_spreadsheet_with_df_dict(sheet, df_dict, overlapBehavior, sheet_formatt
343345
sheet, df, worksheet_name, overlapBehavior,
344346
sheet_formatting_options=sheet_formatting_options.get(worksheet_name, {}),
345347
column_formatting_options=column_formatting_options.get(worksheet_name, {}),
346-
**gspread_update_args
348+
**gspread_update_args.get(worksheet_name, {})
347349
)
348350

349351
def update_sheet_raw(sheets_authentication_response, sheet, *updates):
350352
"""
351353
Directly call the Google Sheets api to update the specified sheet with the optional arguments.
352354
"""
355+
# TODO: gspread.Spreadsheet.batch_update can also do this
353356
assert len(updates) > 0
354357
sheets_api = authenticate_google_api(sheets_authentication_response)
355358
sheet_id = sheet.id

0 commit comments

Comments
 (0)