Skip to content

Commit

Permalink
rdrf #2452 apply bc fix to all patient data in sgc
Browse files Browse the repository at this point in the history
  • Loading branch information
id2359 committed Apr 14, 2023
1 parent e2a0e3c commit 5f3a59b
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 68 deletions.
21 changes: 13 additions & 8 deletions rdrf/dashboards/components/sgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rdrf.models.definition.models import CommonDataElement
from ..components.common import BaseGraphic
from ..utils import get_range, get_base
from ..utils import sanity_check
from ..data import combine_data
from ..data import has_static_followups
from ..data import get_static_followups_handler
Expand Down Expand Up @@ -44,15 +45,12 @@ def get_graphic(self):
self.better = None # an indicator showing whether up is better
self.mode = "single" if self.patient else "all"
data = self.data
from dashboards.utils import dump

dump_file = f"sgc-initial-{self.title}.csv"
dump(dump_file, data)

if has_static_followups(self.registry):
sfu_handler = get_static_followups_handler(self.registry)
sfu_handler.fix_ordering_of_static_followups(data)
data = sfu_handler.fix_ordering_of_static_followups(data)

sanity_check("in get_graphic", data)
scores_map = {}
self.group_info = {}
self.rev_group = {}
Expand Down Expand Up @@ -416,7 +414,15 @@ def calculate_average_scores_over_time(self, data, score_names):
# this only makes sense if this chart is passed
# all patients scores
aggregations_map = {score_name: "mean" for score_name in score_names}
from dashboards.utils import dump

sanity_check("in calc avg scores", data)

dump(f"sgc-{self.title}-preavg.csv", data)

df = data.groupby(SEQ).agg(aggregations_map).reset_index()
dump(f"sgc-{self.title}-postavg.csv", df)

return df

def calculate_score_counts_over_time(self, data, score_names):
Expand Down Expand Up @@ -516,8 +522,7 @@ def get_range_value(self, fields):

def load_all_patients_data(self):
if not self.all_patients_data:
from rdrf.models.definition.models import Registry
from dashboards.data import get_data

registry = Registry.objects.get()
self.all_patients_data = get_data(registry)
self.all_patients_data = get_data(self.registry)
sanity_check("in load all", self.all_patients_data)
59 changes: 0 additions & 59 deletions rdrf/dashboards/components/tl.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,72 +286,13 @@ def _get_table_data(self) -> pd.DataFrame:
display_field = field + "_display"
df[display_field] = df[field].map(lambda v: get_display_value(field, v))

# for BC need to ensure the static followups are in the correct order
if has_static_followups(self.registry):
sfuh = get_static_followups_handler(self.registry)
df = sfuh.fix_ordering_of_static_followups(df)
else:
logger.debug("no static followups!")

return df

def _fix_ordering_of_static_followups(self, df):
logger.debug("fixing static followups:")
static_followups = [
x["name"] for x in self.static_followups["followups"] if x["seq"] != "+"
]
baseline_form = self.static_followups["baseline"]
logger.debug(f"static followups = {static_followups}")
changed = False
for index, row in df.iterrows():
if row["FORM"] == baseline_form:
old_seq = row["SEQ"]
df.at[index, "SEQ"] = 0
logger.debug(f"""static baseline fix: {row["FORM"]} {old_seq} -> 0""")
changed = True
elif row["FORM"] in static_followups:
logger.debug(f"""fixing row for static followup {row["FORM"]}""")
self._fixup_static_followup(df, index, row)
changed = True

if changed:
from rdrf.models.definition.models import RegistryForm

def static_get_seq_name(seq, form):
if form == self.static_followups["baseline"]:
form_model = RegistryForm.objects.get(name=form)
return form_model.display_name
else:
for form_dict in self.static_followups["followups"]:
if form_dict["name"] == form:
form_model = RegistryForm.objects.get(name=form)
return form_model.display_name

df = assign_seq_names(df, static_get_seq_name).sort_values(by="SEQ")

return df

def _fixup_static_followup(self, df, index, row):
# the metadata looks like
# self.static_followups is a dict
# with keys
# "followup_forms": [{"seq": 1, "name": "FollowUpPROMS6months"},
# {"seq": 2, "name": "FUpPROMSYr1"},
# {"seq": 3, "name": "FUpPROMSYr2"},
# {"seq": "+", "name": "FUpPROMS3_10Years"}]
# baseline_form : "<baseline> form
form = row["FORM"]
for static_form_dict in self.static_followups["followups"]:
if static_form_dict["name"] == form:
static_seq = static_form_dict["seq"]
old_seq = row["SEQ"]
df.at[index, "SEQ"] = static_seq
logger.debug(
f"""static fu fix: {row["FORM"]} {old_seq} -> {static_seq}"""
)

def _add_colour_column(self, cde, df):
# https://pandas.pydata.org/docs/reference/api/pandas.Series.map.html
column_name = cde + "_colour"
df[column_name] = df[cde].map(get_colour)
return df
5 changes: 5 additions & 0 deletions rdrf/dashboards/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
needs_all=False,
):
self.registry = registry
self.has_static_followups = has_static_followups(self.registry)
self.state = None
self.config_model = config_model
self.baseline_form = None
Expand Down Expand Up @@ -93,6 +94,10 @@ def __init__(

if not self.no_data:
self._order_by_collection_date(self.df)
if self.has_static_followups:
sfu_handler = get_static_followups_handler(self.registry)
self.df = sfu_handler.fix_ordering_of_static_followups(self.df)

c = datetime.now()
logger.info(f"time taken to load/generate df = {(c-a).total_seconds()} seconds")

Expand Down
1 change: 0 additions & 1 deletion rdrf/dashboards/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ def create_dataframe(self, scenario):

def check_baseline(self, df):
num_baselines = 0

for index, row in df.iterrows():
if row["TYPE"] == "baseline":
self.assertEqual(row["SEQ"], 0, "baseline row does not have SEQ 0")
Expand Down
19 changes: 19 additions & 0 deletions rdrf/dashboards/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,22 @@ def assign_seq_names(df, func=None):
)

return df


class DataFrameError(Exception):
pass


def sanity_check(where, df):
for index, row in df.iterrows():
seq = row["SEQ"]
form_type = row["TYPE"]
form = row["FORM"]
if form_type == "baseline" and seq > 0:
raise DataFrameError(
f"{where} baseline should have seq 0: {form} has seq = {seq}"
)
if form_type == "followup" and seq == 0:
raise DataFrameError(
f"{where} followup should have seq > 0: {form} has seq = 0"
)

0 comments on commit 5f3a59b

Please sign in to comment.