Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Trend, Outlier, Bias, and Nelson Rules Detection in _alarms.py #56

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
implemented df splitting tool and its unity test
  • Loading branch information
zRafaF committed Oct 15, 2024
commit 72be3cc0430c5feec687c12909c3afea9ced9a51
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,5 @@ dmypy.json
# Pyre type checker
.pyre/

# Temporary files
temp.py
38 changes: 37 additions & 1 deletion bibmon/_bibmon_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,4 +739,40 @@ def find_df_transitions(
transitions.append(i)
previous_event = df[label].iloc[i]

return transitions
return transitions

###############################################################################

def split_df_percentages(df: pd.DataFrame, percentages: list[float]) -> list[pd.DataFrame]:
"""
Splits a DataFrame into multiple DataFrames according to the given percentages, the sum of percentages must equal 1.

For example, if percentage = [0.6, 0.2, 0.2], the function will return a list with three DataFrames, the first one with 60% of the data, the second one with 20% and the third one with 20%.

Warning: This function may cause data loss if the split cannot be done exactly according to the percentages.

Parameters
----------
df: pandas.DataFrame
Data to be split.
percentages: list of floats
List of percentages to be used in the split.

Returns
----------
: list of pandas.DataFrames
List with the split DataFrames.
"""

if sum(percentages) != 1:
raise ValueError("The sum of the percentages must be 1.")

split_dfs = []
start = 0

for i in range(len(percentages)):
end = start + int(percentages[i] * len(df))
split_dfs.append(df.iloc[start:end])
start = end

return split_dfs
19 changes: 18 additions & 1 deletion test/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,21 @@ def test_find_df_transitions():

transitions = bibmon._bibmon_tools.find_df_transitions(data, 1, "number", "tag101")

assert transitions == [99, 101, 102, 103, 104, 106, 107, 108, 243]
assert transitions == [99, 101, 102, 103, 104, 106, 107, 108, 243]

def test_split_df_percentages():
data = bibmon.load_real_data()

splitted = bibmon._bibmon_tools.split_df_percentages(data, [0.6, 0.2, 0.2])

assert splitted[0].shape[0] == 1901
assert splitted[1].shape[0] == 633
assert splitted[2].shape[0] == 633

def test_split_df_percentages_error():
data = bibmon.load_real_data()

try:
_ = bibmon._bibmon_tools.split_df_percentages(data, [0.6, 0.2])
except ValueError:
assert True