Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: More sophisticated evaluation logic #534

Merged
merged 22 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0b9af85
tmp
robinholzi Jun 20, 2024
a974941
strip notebook outputs
robinholzi Jun 20, 2024
f121dce
hotfixes
robinholzi Jun 20, 2024
823638d
extend notebook to support patching multiple runs
MaxiBoether Jun 20, 2024
70bf09d
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 20, 2024
e5babff
aggregate
robinholzi Jun 20, 2024
c550b35
wip: merging pipeline dir
MaxiBoether Jun 20, 2024
1e38614
tmp
robinholzi Jun 20, 2024
bd565a4
Merge remote-tracking branch 'origin/main' into robinholzi/fix/eval-p…
MaxiBoether Jun 21, 2024
edc9252
Merge branch 'main' into robinholzi/fix/eval-plotting
robinholzi Jun 21, 2024
40134df
Make aggregation tool more robust
robinholzi Jun 21, 2024
7d1545b
add button to select composite_model_variant
robinholzi Jun 21, 2024
21eccf4
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 21, 2024
e86217a
Improvement to plotting logic
robinholzi Jun 22, 2024
245cc6f
Merge branch 'main' into robinholzi/fix/eval-plotting
robinholzi Jun 22, 2024
1b502d9
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 22, 2024
56784d8
pipe equivalence adjustments
MaxiBoether Jun 22, 2024
1a3a548
UI patches
robinholzi Jun 22, 2024
ff38e38
Fix
robinholzi Jun 22, 2024
b77e6d8
also allow runs for which we only have a single log
MaxiBoether Jun 22, 2024
d05dc40
Merge branch 'robinholzi/fix/eval-plotting' of github.com:eth-easl/mo…
MaxiBoether Jun 22, 2024
dbfbb80
only merge complete pipelines
MaxiBoether Jun 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
hotfixes
  • Loading branch information
robinholzi committed Jun 20, 2024
commit f121dce8c2230d0eef9cb2060cabbffed5d4a5ad
1 change: 0 additions & 1 deletion analytics/app/data/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Any, Literal, cast

import pandas as pd

from modyn.supervisor.internal.grpc.enums import PipelineStage
from modyn.supervisor.internal.pipeline_executor.models import PipelineLogs, SingleEvaluationInfo
from modyn.supervisor.internal.utils.time_tools import generate_real_training_end_timestamp
Expand Down
3 changes: 1 addition & 2 deletions analytics/app/pages/plots/eval_heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from dataclasses import dataclass

import pandas as pd
from analytics.app.data.transform import patch_yearbook_time
from dash import Input, Output, callback, dcc, html
from plotly import graph_objects as go

from analytics.app.data.transform import patch_yearbook_time


@dataclass
class _SharedData:
Expand Down
3 changes: 1 addition & 2 deletions analytics/app/pages/plots/eval_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@

import pandas as pd
import plotly.express as px
from analytics.app.data.transform import patch_yearbook_time
from dash import Input, Output, callback, dcc, html
from plotly import graph_objects as go

from analytics.app.data.transform import patch_yearbook_time


@dataclass
class _SharedData:
Expand Down
80 changes: 80 additions & 0 deletions analytics/tools/patch_logfile.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,86 @@
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"models_red = df_models[[\"trigger_id\", \"id_model\", \"train_start\", \"train_end\"]]\n",
"models_red"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eval_red = eval_requests[[\"trigger_id\", \"training_idx\", \"model_idx\", \"interval_start\", \"interval_end\", \"eval_handler\", \"dataset_id\"]]\n",
"eval_red"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_cross = models_red.merge(eval_red, on=\"trigger_id\").rename(columns={\"train_start\": \"first_timestamp\", \"train_end\": \"last_timestamp\"})\n",
"assert df_cross.shape[0] == eval_red.shape[0]\n",
"df_cross"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adapted logic from handler.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df_cross[\"active_candidate\"] = df_cross[\"last_timestamp\"] < df_cross[\"active_model_trained_before\"]\n",
"\n",
"# # find the maximum model for every EvalCandidate that doesn't violate that constraint\n",
"# max_model_id = (\n",
"# df_cross[df_cross[\"active_candidate\"]]\n",
"# .groupby(\"active_model_trained_before\")[\"id_model\"]\n",
"# .aggregate(max_model_id=\"max\")\n",
"# )\n",
"\n",
"# # combine: a model in the cross product is most recent for a certain interval iff\n",
"# # it has maximum model id for its active_model_trained_before\n",
"# df_active_models = df_cross.merge(max_model_id, on=\"active_model_trained_before\", how=\"left\")\n",
"# df_active_models[\"active_model\"] = df_active_models[\"id_model\"] == df_active_models[\"max_model_id\"]\n",
"\n",
"# # for a given interval, the currently trained model is the model with the smallest id\n",
"# # from all models that have a strictly bigger id than the most recent model. Hence it is the model after the\n",
"# # most recent model.\n",
"# # For that we first build a model -> successor model mapping:\n",
"# model_successor_relation = df_active_models[[\"id_model\"]].drop_duplicates().sort_values(by=\"id_model\")\n",
"# model_successor_relation[\"next_id_model\"] = model_successor_relation[\"id_model\"].shift(-1, fill_value=-1)\n",
"\n",
"# # if there's no active model for the first interval(s), we still need to define the next model as the\n",
"# # trained model\n",
"# model_successor_relation = pd.concat(\n",
"# [\n",
"# model_successor_relation,\n",
"# pd.DataFrame([{\"id_model\": None, \"next_id_model\": df_active_models[\"id_model\"].min()}]),\n",
"# ]\n",
"# )\n",
"\n",
"# df_trained_models = df_active_models.merge(\n",
"# model_successor_relation, how=\"left\", left_on=\"max_model_id\", right_on=\"id_model\", suffixes=(\"\", \"__\")\n",
"# )\n",
"# df_trained_models[\"trained_model\"] = df_trained_models[\"id_model\"] == df_trained_models[\"next_id_model\"]\n"
]
}
],
"metadata": {
Expand Down
7 changes: 7 additions & 0 deletions modyn/config/schema/pipeline/trigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ class TimeTriggerConfig(ModynBaseModel):
description="Interval length for the trigger as an integer followed by a time unit: s, m, h, d, w, y",
pattern=rf"^\d+{REGEX_TIME_UNIT}$",
)
start_timestamp: int | None = Field(
None,
description=(
"The timestamp at which the triggering schedule starts. First trigger will be at start_timestamp + every."
"Use None to start at the first timestamp of the data."
),
)

@cached_property
def every_seconds(self) -> int:
Expand Down
15 changes: 3 additions & 12 deletions modyn/supervisor/internal/eval/handler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from __future__ import annotations

import pandas as pd
from pydantic import BaseModel, Field

from modyn.config.schema.pipeline import EvalHandlerConfig
from modyn.supervisor.internal.eval.strategies.abstract import AbstractEvalStrategy
from modyn.supervisor.internal.utils.time_tools import generate_real_training_end_timestamp
from modyn.utils import dynamic_module_import
from pydantic import BaseModel, Field

eval_strategy_module = dynamic_module_import("modyn.supervisor.internal.eval.strategies")

Expand Down Expand Up @@ -89,15 +87,8 @@ def get_eval_requests_after_pipeline(self, df_trainings: pd.DataFrame) -> list[E
) == set()
df_trainings = df_trainings.copy()

# for sparse datasets we want to use next_training_start-1 as training interval end instead of last_timestamp
# as there could be a long gap between the max(sample_time) in one training batch and the min(sample_time) in
# the next training batch.
# e.g. if we want to train for 1.1.2020-31.12.2020 but only have timestamps on 1.1.2020, last_timestamp
# would be 1.1.2020, but the next training would start on 1.1.2021.
df_trainings["real_last_timestamp"] = generate_real_training_end_timestamp(df_trainings)

training_intervals: list[tuple[int, int]] = [
(row["first_timestamp"], row["real_last_timestamp"]) for _, row in df_trainings.iterrows()
(row["first_timestamp"], row["last_timestamp"]) for _, row in df_trainings.iterrows()
]
eval_intervals = self.eval_strategy.get_eval_intervals(training_intervals)
df_eval_intervals = pd.DataFrame(
Expand All @@ -114,7 +105,7 @@ def get_eval_requests_after_pipeline(self, df_trainings: pd.DataFrame) -> list[E
# Check if a combination is the active. We first compute if model was trained before
# the usage starts last_timestamp (df_trainings) defines the end of the training data;
# active_model_trained_before (df_eval_intervals): defines center of an eval intervals.
df_cross["active_candidate"] = df_cross["real_last_timestamp"] < df_cross["active_model_trained_before"]
df_cross["active_candidate"] = df_cross["last_timestamp"] < df_cross["active_model_trained_before"]

# find the maximum model for every EvalCandidate that doesn't violate that constraint
max_model_id = (
Expand Down
19 changes: 11 additions & 8 deletions modyn/supervisor/internal/triggers/timetrigger.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,23 @@ class TimeTrigger(Trigger):
Clock starts with the first observed datapoint"""

def __init__(self, config: TimeTriggerConfig):
self.trigger_every_s: int = config.every_seconds
self.config = config
self.next_trigger_at: int | None = None

if self.trigger_every_s < 1:
raise ValueError(f"trigger_every must be > 0, but is {self.trigger_every_s}")
if self.config.every_seconds < 1:
raise ValueError(f"trigger_every must be > 0, but is {self.config.every_seconds}")

super().__init__()

def inform(self, new_data: list[tuple[int, int, int]]) -> Generator[int, None, None]:
if self.next_trigger_at is None:
if len(new_data) > 0:
self.next_trigger_at = new_data[0][1] + self.trigger_every_s # new_data is sorted
if self.config.start_timestamp is not None:
self.next_trigger_at = self.config.start_timestamp + self.config.every_seconds
else:
return
if len(new_data) > 0:
self.next_trigger_at = new_data[0][1] + self.config.every_seconds # new_data is sorted
else:
return

max_timestamp = new_data[-1][1] # new_data is sorted
triggering_indices = []
Expand All @@ -42,9 +45,9 @@ def inform(self, new_data: list[tuple[int, int, int]]) -> Generator[int, None, N
# This means that there was a trigger before the first item that we got informed about
# However, there might have been multiple triggers, e.g., if there is one trigger every second
# and 5 seconds have passed since the last item came through
# This is caught by our while loop which increases step by step for `trigger_every_s`.
# This is caught by our while loop which increases step by step for `config.every_seconds`.

triggering_indices.append(idx - 1)
self.next_trigger_at += self.trigger_every_s
self.next_trigger_at += self.config.every_seconds

yield from triggering_indices
Loading