Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: More sophisticated evaluation logic #534

Merged
merged 22 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0b9af85
tmp
robinholzi Jun 20, 2024
a974941
strip notebook outputs
robinholzi Jun 20, 2024
f121dce
hotfixes
robinholzi Jun 20, 2024
823638d
extend notebook to support patching multiple runs
MaxiBoether Jun 20, 2024
70bf09d
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 20, 2024
e5babff
aggregate
robinholzi Jun 20, 2024
c550b35
wip: merging pipeline dir
MaxiBoether Jun 20, 2024
1e38614
tmp
robinholzi Jun 20, 2024
bd565a4
Merge remote-tracking branch 'origin/main' into robinholzi/fix/eval-p…
MaxiBoether Jun 21, 2024
edc9252
Merge branch 'main' into robinholzi/fix/eval-plotting
robinholzi Jun 21, 2024
40134df
Make aggregation tool more robust
robinholzi Jun 21, 2024
7d1545b
add button to select composite_model_variant
robinholzi Jun 21, 2024
21eccf4
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 21, 2024
e86217a
Improvement to plotting logic
robinholzi Jun 22, 2024
245cc6f
Merge branch 'main' into robinholzi/fix/eval-plotting
robinholzi Jun 22, 2024
1b502d9
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 22, 2024
56784d8
pipe equivalence adjustments
MaxiBoether Jun 22, 2024
1a3a548
UI patches
robinholzi Jun 22, 2024
ff38e38
Fix
robinholzi Jun 22, 2024
b77e6d8
also allow runs for which we only have a single log
MaxiBoether Jun 22, 2024
d05dc40
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 22, 2024
dbfbb80
only merge complete pipelines
MaxiBoether Jun 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix
  • Loading branch information
robinholzi committed Jun 22, 2024
commit ff38e38afffa7275596648a3f1e52ba2ce89a90d
21 changes: 12 additions & 9 deletions analytics/app/pages/plots/cost_vs_eval_metric_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,31 @@ def gen_fig_scatter_num_triggers(
) -> go.Figure:
# unpack data
composite_model_variant = _shared_data[page].composite_model_variant
df_logs = _shared_data[page].df_all
df_logs_eval_single = _shared_data[page].df_eval_single.copy() # TODO get rid of this
df_logs_eval_single = df_logs_eval_single[
(df_logs_eval_single["dataset_id"] == dataset_id)
& (df_logs_eval_single["eval_handler"] == eval_handler)
& (df_logs_eval_single[composite_model_variant])
df_all = _shared_data[page].df_all
df_eval_single = _shared_data[page].df_eval_single
df_eval_single = df_eval_single[
(df_eval_single["dataset_id"] == dataset_id)
& (df_eval_single["eval_handler"] == eval_handler)
& (df_eval_single[composite_model_variant])
# & (df_adjusted["metric"] == metric)
]

agg_eval_metric = df_aggregate_eval_metric(
df_logs_eval_single,
df_eval_single,
group_by=["pipeline_ref", "metric"],
in_col="value",
out_col="metric_value",
aggregate_func=agg_func_y,
)

agg_duration = (
df_logs[df_logs["id"].isin(stages)].groupby(["pipeline_ref"]).agg(cost=("duration", agg_func_x)).reset_index()
df_all[df_all["id"].isin(stages)].groupby(["pipeline_ref"]).agg(cost=("duration", agg_func_x)).reset_index()
)

merged = agg_eval_metric.merge(agg_duration, on="pipeline_ref")
assert (
agg_eval_metric.shape[0] == merged.shape[0] == agg_duration.shape[0] * len(agg_eval_metric["metric"].unique())
)
fig = px.scatter(
merged,
x="cost",
Expand All @@ -71,7 +74,7 @@ def gen_fig_scatter_num_triggers(
facet_col="metric",
labels={
"cost": f"{agg_func_x} duration in sec. (proxy for cost)",
"metric_value": f"{agg_func_y} {metric}",
"metric_value": f"{agg_func_y}",
"pipeline_ref": "Pipeline",
},
category_orders={
Expand Down
1 change: 1 addition & 0 deletions analytics/app/pages/plots/num_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def section_num_samples(
df_models=df_models,
df_eval_requests=df_eval_requests,
)
_shared_data[page].composite_model_variant = composite_model_variant
_shared_data[page].df_models = df_models
_shared_data[page].df_eval_requests = df_eval_requests

Expand Down
34 changes: 19 additions & 15 deletions analytics/app/pages/plots/num_triggers_eval_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,36 +51,34 @@ def gen_fig_scatter_num_triggers(
"""
# unpack data
composite_model_variant = _shared_data[page].composite_model_variant
df_logs_agg = _shared_data[page].df_agg
df_logs_eval_single = _shared_data[page].df_eval_single
df_logs_eval_single = df_logs_eval_single[
(df_logs_eval_single["dataset_id"] == dataset_id)
& (df_logs_eval_single["eval_handler"] == eval_handler)
df_agg = _shared_data[page].df_agg
df_eval_single = _shared_data[page].df_eval_single
df_eval_single = df_eval_single[
(df_eval_single["dataset_id"] == dataset_id)
& (df_eval_single["eval_handler"] == eval_handler)
# & (df_adjusted["metric"] == metric)
]

if multi_pipeline_mode or only_active_periods:
# we only want the pipeline performance (composed of the models active periods stitched together)
df_logs_eval_single = df_logs_eval_single[df_logs_eval_single[composite_model_variant]]
df_eval_single = df_eval_single[df_eval_single[composite_model_variant]]

if not multi_pipeline_mode:
assert df_logs_eval_single["pipeline_ref"].nunique() == 1

# add the pipeline time series which is the performance of different models stitched together dep.
# w.r.t which model was active
pipeline_composite_model = df_logs_eval_single[df_logs_eval_single[composite_model_variant]]
pipeline_composite_model = df_eval_single[df_eval_single[composite_model_variant]]
pipeline_composite_model["id_model"] = "0-pipeline-composite-model"
df_logs_eval_single["id_model"] = df_logs_eval_single["id_model"].astype(str)
df_logs_eval_single = pd.concat([df_logs_eval_single, pipeline_composite_model])
df_eval_single["id_model"] = df_eval_single["id_model"].astype(str)
df_eval_single = pd.concat([df_eval_single, pipeline_composite_model])

col_map = {"value": "metric_value", "count": "num_triggers"}
num_triggers = df_logs_agg[df_logs_agg["id"] == PipelineStage.HANDLE_SINGLE_TRIGGER.name][["pipeline_ref", "count"]]
accuracies = df_logs_eval_single
num_triggers = df_agg[df_agg["id"] == PipelineStage.HANDLE_SINGLE_TRIGGER.name][["pipeline_ref", "count"]]
accuracies = df_eval_single
labels = {
"pipeline_ref": "Pipeline",
"metric": "Metric",
"num_triggers": "#triggers (proxy for cost)",
"metric_value": f"Metric value {'(mean)' if aggregate_metric else ''}",
"metric_value": f"Metric value {'(aggregated)' if aggregate_metric else ''}",
}
category_orders = {
"pipeline_ref": list(sorted(accuracies["pipeline_ref"].unique())),
Expand All @@ -95,6 +93,11 @@ def gen_fig_scatter_num_triggers(
aggregate_func="time_weighted_avg" if time_weighted else "mean",
)
merged = num_triggers.merge(mean_accuracies, on="pipeline_ref").rename(columns=col_map, inplace=False)
assert (
mean_accuracies.shape[0]
== merged.shape[0]
== num_triggers.shape[0] * len(mean_accuracies["metric"].unique())
)
fig = px.scatter(
merged,
x="num_triggers",
Expand Down Expand Up @@ -140,6 +143,7 @@ def section3_scatter_num_triggers(
df_agg=df_agg,
df_eval_single=df_eval_single,
)
_shared_data[page].composite_model_variant = composite_model_variant
_shared_data[page].df_agg = df_agg
_shared_data[page].df_eval_single = df_eval_single

Expand All @@ -149,7 +153,7 @@ def section3_scatter_num_triggers(
Input(f"{page}-radio-scatter-number-triggers-dataset-id", "value"),
Input(f"{page}-radio-scatter-number-triggers-metric", "value"),
Input(f"{page}-radio-scatter-number-triggers-agg-y", "value"),
Input(f"{page}-radio-1d-eval-metric-only-active-model-periods", "value"),
Input(f"{page}-radio-scatter-number-triggers-agg-time-weighted", "value"),
Input(f"{page}-radio-scatter-number-triggers-only-active-model-periods", "value"),
)
def update_scatter_num_triggers(
Expand Down
2 changes: 2 additions & 0 deletions analytics/app/pages/plots/one_dimensional_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def gen_fig_1d_cost(page: str) -> go.Figure:
color="id",
labels={"pipeline_ref": "Pipeline", "duration": "duration in seconds", "id": "Pipeline Stage"},
title="Stage costs",
height=900,
)


Expand Down Expand Up @@ -133,6 +134,7 @@ def section4_1d_boxplots(
df_all=df_all,
df_eval_single=df_eval_single,
)
_shared_data[page].composite_model_variant = composite_model_variant
_shared_data[page].df_all = df_all
_shared_data[page].df_eval_single = df_eval_single

Expand Down
5 changes: 2 additions & 3 deletions analytics/app/pages/state.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from dataclasses import dataclass

import pandas as pd

from analytics.app.data.load import list_pipelines, load_pipeline_logs
from analytics.app.data.transform import (
dfs_models_and_evals,
Expand Down Expand Up @@ -56,7 +55,7 @@ def process_pipeline_data(pipeline_id: int) -> ProcessedPipelineData:
df_parents = pipeline_stage_parents(logs)
df_add_parents = df_agg.merge(df_parents, left_on="id", right_on="id", how="left")

df_logs_models, df_eval_requests, df_logs_eval_single = dfs_models_and_evals(
df_logs_models, df_eval_requests, df_eval_single = dfs_models_and_evals(
logs, df_all["sample_time"].max(), pipeline_ref
)

Expand All @@ -72,5 +71,5 @@ def process_pipeline_data(pipeline_id: int) -> ProcessedPipelineData:
df_add_parents=df_add_parents,
df_models=df_logs_models,
df_eval_requests=df_eval_requests,
df_eval_single=df_logs_eval_single,
df_eval_single=df_eval_single,
)
12 changes: 9 additions & 3 deletions analytics/tools/aggregate_runs/pipeline_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@ def assert_pipeline_equivalence(logs: list[PipelineLogs]) -> None:
candidate.config.pipeline.training.device = candidates[0].config.pipeline.training.device
candidate.config.pipeline.evaluation.device = candidates[0].config.pipeline.evaluation.device

if isinstance(candidate.config.pipeline.selection_strategy, CoresetStrategyConfig) and isinstance(candidate.config.pipeline.selection_strategy.downsampling_config, RHOLossDownsamplingConfig):
candidate.config.pipeline.selection_strategy.downsampling_config.il_training_config.device = candidates[0].config.pipeline.selection_strategy.downsampling_config.il_training_config.device
candidate.config.pipeline.selection_strategy.downsampling_config.il_training_config.seed = candidates[0].config.pipeline.selection_strategy.downsampling_config.il_training_config.seed
if isinstance(candidate.config.pipeline.selection_strategy, CoresetStrategyConfig) and isinstance(
candidate.config.pipeline.selection_strategy.downsampling_config, RHOLossDownsamplingConfig
):
candidate.config.pipeline.selection_strategy.downsampling_config.il_training_config.device = candidates[
0
].config.pipeline.selection_strategy.downsampling_config.il_training_config.device
candidate.config.pipeline.selection_strategy.downsampling_config.il_training_config.seed = candidates[
0
].config.pipeline.selection_strategy.downsampling_config.il_training_config.seed

assert all(
[candidate.config == candidates[0].config for candidate in candidates]
Expand Down
Loading