Skip to content

Commit

Permalink
fix(atomic): use iloc to select last value in the dataframe
Browse files Browse the repository at this point in the history
This commit fixes a bug in the scores and concedes functions in atomic vaep,
where the intention is to grab the last element in the dataframe and assign that
value to the end of the shifted dataframe.

See also #718
Fixes #749
  • Loading branch information
probberechts committed Jun 24, 2024
1 parent 5b2fa1f commit 433513f
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 108 deletions.
216 changes: 108 additions & 108 deletions socceraction/atomic/vaep/labels.py
Original file line number Diff line number Diff line change
@@ -1,108 +1,108 @@
"""Implements the label tranformers of the Atomic-VAEP framework."""

import pandas as pd
from pandera.typing import DataFrame

import socceraction.atomic.spadl.config as atomicspadl
from socceraction.atomic.spadl import AtomicSPADLSchema


def scores(actions: DataFrame[AtomicSPADLSchema], nr_actions: int = 10) -> pd.DataFrame:
"""Determine whether the team possessing the ball scored a goal within the next x actions.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
nr_actions : int, default=10 # noqa: DAR103
Number of actions after the current action to consider.
Returns
-------
pd.DataFrame
A dataframe with a column 'scores' and a row for each action set to
True if a goal was scored by the team possessing the ball within the
next x actions; otherwise False.
"""
# merging goals, owngoals and team_ids
goals = actions["type_id"] == atomicspadl.actiontypes.index("goal")
owngoals = actions["type_id"] == atomicspadl.actiontypes.index("owngoal")
y = pd.concat([goals, owngoals, actions["team_id"]], axis=1)
y.columns = ["goal", "owngoal", "team_id"]

# adding future results
for i in range(1, nr_actions):
for c in ["team_id", "goal", "owngoal"]:
shifted = y[c].shift(-i)
shifted[-i:] = y[c][len(y) - 1]
y["%s+%d" % (c, i)] = shifted

res = y["goal"]
for i in range(1, nr_actions):
gi = y["goal+%d" % i] & (y["team_id+%d" % i] == y["team_id"])
ogi = y["owngoal+%d" % i] & (y["team_id+%d" % i] != y["team_id"])
res = res | gi | ogi

return pd.DataFrame(res, columns=["scores"])


def concedes(actions: DataFrame[AtomicSPADLSchema], nr_actions: int = 10) -> pd.DataFrame:
"""Determine whether the team possessing the ball conceded a goal within the next x actions.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
nr_actions : int, default=10 # noqa: DAR103
Number of actions after the current action to consider.
Returns
-------
pd.DataFrame
A dataframe with a column 'concedes' and a row for each action set to
True if a goal was conceded by the team possessing the ball within the
next x actions; otherwise False.
"""
# merging goals, owngoals and team_ids
goals = actions["type_id"] == atomicspadl.actiontypes.index("goal")
owngoals = actions["type_id"] == atomicspadl.actiontypes.index("owngoal")
y = pd.concat([goals, owngoals, actions["team_id"]], axis=1)
y.columns = ["goal", "owngoal", "team_id"]

# adding future results
for i in range(1, nr_actions):
for c in ["team_id", "goal", "owngoal"]:
shifted = y[c].shift(-i)
shifted[-i:] = y[c][len(y) - 1]
y["%s+%d" % (c, i)] = shifted

res = y["owngoal"]
for i in range(1, nr_actions):
gi = y["goal+%d" % i] & (y["team_id+%d" % i] != y["team_id"])
ogi = y["owngoal+%d" % i] & (y["team_id+%d" % i] == y["team_id"])
res = res | gi | ogi

return pd.DataFrame(res, columns=["concedes"])


def goal_from_shot(actions: DataFrame[AtomicSPADLSchema]) -> pd.DataFrame:
"""Determine whether a goal was scored from the current action.
This label can be use to train an xG model.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
Returns
-------
pd.DataFrame
A dataframe with a column 'goal' and a row for each action set to
True if a goal was scored from the current action; otherwise False.
"""
goals = (actions["type_id"] == atomicspadl.actiontypes.index("shot")) & (
actions["type_id"].shift(-1) == atomicspadl.actiontypes.index("goal")
)

return pd.DataFrame(goals.rename("goal"))
"""Implements the label tranformers of the Atomic-VAEP framework."""

import pandas as pd
from pandera.typing import DataFrame

import socceraction.atomic.spadl.config as atomicspadl
from socceraction.atomic.spadl import AtomicSPADLSchema


def scores(actions: DataFrame[AtomicSPADLSchema], nr_actions: int = 10) -> pd.DataFrame:
"""Determine whether the team possessing the ball scored a goal within the next x actions.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
nr_actions : int, default=10 # noqa: DAR103
Number of actions after the current action to consider.
Returns
-------
pd.DataFrame
A dataframe with a column 'scores' and a row for each action set to
True if a goal was scored by the team possessing the ball within the
next x actions; otherwise False.
"""
# merging goals, owngoals and team_ids
goals = actions["type_id"] == atomicspadl.actiontypes.index("goal")
owngoals = actions["type_id"] == atomicspadl.actiontypes.index("owngoal")
y = pd.concat([goals, owngoals, actions["team_id"]], axis=1)
y.columns = ["goal", "owngoal", "team_id"]

# adding future results
for i in range(1, nr_actions):
for c in ["team_id", "goal", "owngoal"]:
shifted = y[c].shift(-i)
shifted[-i:] = y[c].iloc[len(y) - 1]
y["%s+%d" % (c, i)] = shifted

res = y["goal"]
for i in range(1, nr_actions):
gi = y["goal+%d" % i] & (y["team_id+%d" % i] == y["team_id"])
ogi = y["owngoal+%d" % i] & (y["team_id+%d" % i] != y["team_id"])
res = res | gi | ogi

return pd.DataFrame(res, columns=["scores"])


def concedes(actions: DataFrame[AtomicSPADLSchema], nr_actions: int = 10) -> pd.DataFrame:
"""Determine whether the team possessing the ball conceded a goal within the next x actions.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
nr_actions : int, default=10 # noqa: DAR103
Number of actions after the current action to consider.
Returns
-------
pd.DataFrame
A dataframe with a column 'concedes' and a row for each action set to
True if a goal was conceded by the team possessing the ball within the
next x actions; otherwise False.
"""
# merging goals, owngoals and team_ids
goals = actions["type_id"] == atomicspadl.actiontypes.index("goal")
owngoals = actions["type_id"] == atomicspadl.actiontypes.index("owngoal")
y = pd.concat([goals, owngoals, actions["team_id"]], axis=1)
y.columns = ["goal", "owngoal", "team_id"]

# adding future results
for i in range(1, nr_actions):
for c in ["team_id", "goal", "owngoal"]:
shifted = y[c].shift(-i)
shifted[-i:] = y[c].iloc[len(y) - 1]
y["%s+%d" % (c, i)] = shifted

res = y["owngoal"]
for i in range(1, nr_actions):
gi = y["goal+%d" % i] & (y["team_id+%d" % i] != y["team_id"])
ogi = y["owngoal+%d" % i] & (y["team_id+%d" % i] == y["team_id"])
res = res | gi | ogi

return pd.DataFrame(res, columns=["concedes"])


def goal_from_shot(actions: DataFrame[AtomicSPADLSchema]) -> pd.DataFrame:
"""Determine whether a goal was scored from the current action.
This label can be use to train an xG model.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
Returns
-------
pd.DataFrame
A dataframe with a column 'goal' and a row for each action set to
True if a goal was scored from the current action; otherwise False.
"""
goals = (actions["type_id"] == atomicspadl.actiontypes.index("shot")) & (
actions["type_id"].shift(-1) == atomicspadl.actiontypes.index("goal")
)

return pd.DataFrame(goals.rename("goal"))
18 changes: 18 additions & 0 deletions tests/atomic/test_atomic_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import socceraction.atomic.spadl.utils as spu
import socceraction.atomic.vaep.labels as lab
from pandera.typing import DataFrame
from socceraction.atomic.spadl import AtomicSPADLSchema


def test_scores(atomic_spadl_actions: DataFrame[AtomicSPADLSchema]) -> None:
nr_actions = 10
atomic_spadl_actions = spu.add_names(atomic_spadl_actions)
scores = lab.scores(atomic_spadl_actions, nr_actions)
assert len(scores) == len(atomic_spadl_actions)


def test_conceds(atomic_spadl_actions: DataFrame[AtomicSPADLSchema]) -> None:
nr_actions = 10
atomic_spadl_actions = spu.add_names(atomic_spadl_actions)
concedes = lab.concedes(atomic_spadl_actions, nr_actions)
assert len(concedes) == len(atomic_spadl_actions)

0 comments on commit 433513f

Please sign in to comment.