Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pySEQ/SEQopts.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class SEQopts:
indicator_baseline: str = "_bas"
indicator_squared: str = "_sq"
km_curves: bool = False
multinomial: bool = False # - this can maybe be removed since statsmodels seems to be handling it?
ncores: int = multiprocessing.cpu_count()
numerator: Optional[str] = None
parallel: bool = False
Expand Down
3 changes: 3 additions & 0 deletions pySEQ/analysis/_outcome_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def _outcome_fit(self,
)
)
df_pd = df.to_pandas()
df_pd[self.treatment_col] = df_pd[self.treatment_col].astype("category")
tx_bas = f"{self.treatment_col}{self.indicator_baseline}"
df_pd[tx_bas] = df_pd[tx_bas].astype("category")
for col in self.fixed_cols:
if col in df_pd.columns:
df_pd[col] = df_pd[col].astype("category")
Expand Down
2 changes: 1 addition & 1 deletion pySEQ/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def load_data(name: str = "SEQdata") -> pl.DataFrame:
if name == "SEQdata":
data_path = loc.joinpath("SEQdata.csv")
elif name == "SEQdata_multitreatment":
data_path = loc.joinpath("SEQdata_nultitreatment.csv")
data_path = loc.joinpath("SEQdata_multitreatment.csv")
else:
data_path = loc.joinpath("SEQdata_LTFU.csv")
return pl.read_csv(data_path)
Expand Down
6 changes: 6 additions & 0 deletions pySEQ/error/_param_checker.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from ..helpers import _pad

def _param_checker(self):
if self.subgroup_colname is not None and self.subgroup_colname not in self.fixed_cols:
raise ValueError("subgroup_colname must be included in fixed_cols.")
Expand All @@ -22,5 +24,9 @@ def _param_checker(self):
if self.weighted and self.method == "ITT" and self.cense_colname is None:
raise ValueError("For weighted ITT analyses, cense_colname must be provided.")

if self.excused:
_, self.excused_colnames = _pad(self.treatment_level, self.excused_colnames)
_, self.weight_eligible_colnames = _pad(self.treatment_level, self.weight_eligible_colnames)

return

3 changes: 2 additions & 1 deletion pySEQ/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
from ._bootstrap import bootstrap_loop
from ._format_time import _format_time
from ._predict_model import _predict_model
from ._prepare_data import _prepare_data
from ._prepare_data import _prepare_data
from ._pad import _pad
7 changes: 7 additions & 0 deletions pySEQ/helpers/_pad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
def _pad(a, b):
len_a, len_b = len(a), len(b)
if len_a < len_b:
a = a + [None] * (len_b - len_a)
elif len_b < len_a:
b = b + [None] * (len_a - len_b)
return a, b
8 changes: 4 additions & 4 deletions pySEQ/weighting/_weight_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def _fit_numerator(self, WDT):
formula = f"{predictor}~{self.numerator}"
tx_bas = f"{self.treatment_col}{self.indicator_baseline}" if self.excused else "tx_lag"
fits = []
for i in self.treatment_level:
for i, level in enumerate(self.treatment_level):
if self.excused and self.excused_colnames[i] is not None:
DT_subset = WDT[WDT[self.excused_colnames[i]] == 0]
else:
DT_subset = WDT
if self.weight_lag_condition:
DT_subset = DT_subset[DT_subset[tx_bas] == i]
DT_subset = DT_subset[DT_subset[tx_bas] == level]
if self.weight_eligible_colnames[i] is not None:
DT_subset = DT_subset[DT_subset[self.weight_eligible_colnames[i]] == 1]

Expand All @@ -56,13 +56,13 @@ def _fit_denominator(self, WDT):
predictor = "switch" if self.excused and not self.weight_preexpansion else self.treatment_col
formula = f"{predictor}~{self.denominator}"
fits = []
for i in self.treatment_level:
for i, level in enumerate(self.treatment_level):
if self.excused and self.excused_colnames[i] is not None:
DT_subset = WDT[WDT[self.excused_colnames[i]] == 0]
else:
DT_subset = WDT
if self.weight_lag_condition:
DT_subset = DT_subset[DT_subset["tx_lag"] == i]
DT_subset = DT_subset[DT_subset["tx_lag"] == level]
if not self.weight_preexpansion and not self.excused:
DT_subset = DT_subset[DT_subset['followup'] != 0]
if self.weight_eligible_colnames[i] is not None:
Expand Down
86 changes: 66 additions & 20 deletions tests/test_coefficients.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ def test_ITT_coefs():
s.expand()
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]['Coef.'].to_list()
assert matrix == [-6.828506035553367, 0.12717241010543864, 0.1893500309004178,
0.03371515698762837, -0.00014691202235021713, 0.044566165558946304,
0.000578777043905276, 0.0032906669395291782, -0.013392420492057825,
0.20072409918428197]
assert matrix == [-6.828506035553407, 0.18935003090041902, 0.12717241010542563,
0.033715156987629266, -0.00014691202235029346, 0.044566165558944326,
0.0005787770439053261, 0.0032906669395295026, -0.01339242049205771,
0.20072409918428052]

def test_PreE_dose_response_coefs():
data = load_data("SEQdata")
Expand Down Expand Up @@ -91,9 +91,9 @@ def test_PreE_censoring_coefs():
s.expand()
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]['Coef.'].to_list()
assert matrix == [-4.818288687908983, 0.06202831678835523, 0.5116656068909778,
0.025489681857267917, 0.00018215948440049318, -0.014019017637919164,
0.0011102389266667307]
assert matrix == [-4.818288687908951, 0.511665606890965, 0.062028316788368384,
0.025489681857269905, 0.00018215948440046585, -0.014019017637918164,
0.001110238926667272]

def test_PostE_censoring_coefs():
data = load_data("SEQdata")
Expand All @@ -113,11 +113,10 @@ def test_PostE_censoring_coefs():
s.expand()
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]["Coef."].to_list()
assert matrix == [-7.911317932628025, 0.08903087485404401, 0.4909219070145824,
0.026160806382874355, 0.0001907814850356967, 0.04445697224986894,
0.0007051968052006822, 0.00431623909529477, 0.013762799304812941,
0.3196331024454667]
return print(matrix)
assert matrix == [-7.9113179326280445, 0.49092190701455873, 0.08903087485402544,
0.026160806382879903, 0.00019078148503570062, 0.04445697224987294,
0.0007051968052005897, 0.004316239095295115, 0.013762799304812959,
0.3196331024454665]

def test_PreE_censoring_excused_coefs():
data = load_data("SEQdata")
Expand Down Expand Up @@ -165,10 +164,10 @@ def test_PostE_censoring_excused_coefs():
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]["Coef."].to_list()
# Doesn't converge on test data (have to input max weight)!
assert matrix == [-7.126398786875212, 0.13345454814736768, 0.2632047482928211,
0.03967181206032499, -0.0003308944679339907, 0.03763545026332593,
0.0007588725152627008, 0.0036793093608787847, -0.022372677571544725,
0.24418426175207003]
assert matrix == [-7.126398786875262, 0.2632047482928519, 0.13345454814736696,
0.03967181206032395, -0.00033089446793392585, 0.03763545026332514,
0.0007588725152627089, 0.0036793093608788923, -0.022372677571544992,
0.2441842617520696]

def test_PreE_LTFU_ITT():
data = load_data("SEQdata_LTFU")
Expand All @@ -190,10 +189,10 @@ def test_PreE_LTFU_ITT():
s.expand()
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]["Coef."].to_list()
assert matrix == [-21.640523091572796, -0.19006360662228572, 0.0685235184372898,
0.028750950193838918, -0.0005762057433736666, 0.28554312978583757,
-0.001373044229623057, 0.006589141394458155, -0.44898959259422394,
1.3875089788036237]
assert matrix == [-21.640523091572675, 0.06852351843717125, -0.19006360662233904,
0.02875095019383619, -0.0005762057433737245, 0.28554312978583674,
-0.001373044229622937, 0.00658914139445824, -0.44898959259422067,
1.387508978803619]

def test_PostE_LTFU_ITT():
data = load_data("SEQdata_LTFU")
Expand All @@ -218,3 +217,50 @@ def test_PostE_LTFU_ITT():
0.028750950193838918, -0.0005762057433736666, 0.28554312978583757,
-0.001373044229623057, 0.006589141394458155, -0.44898959259422394,
1.3875089788036237]

def test_ITT_multinomial():
data = load_data("SEQdata_multitreatment")

s = SEQuential(
data,
id_col="ID",
time_col="time",
eligible_col="eligible",
treatment_col="tx_init",
outcome_col="outcome",
time_varying_cols=["N", "L", "P"],
fixed_cols=["sex"],
method = "ITT",
parameters=SEQopts(treatment_level=[1,2])
)
s.expand()
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]["Coef."].to_list()
assert matrix == [-47.505262164163625, 1.76628017234151, 22.79205044396338,
0.14473536056627245, -0.003725499516376173, 0.2893070991930884,
-0.004266608123938117, 0.05574429164512122, 0.7847862691929901,
1.4703411759229423]

def test_weighted_multinomial():
data = load_data("SEQdata_multitreatment")

s = SEQuential(
data,
id_col="ID",
time_col="time",
eligible_col="eligible",
treatment_col="tx_init",
outcome_col="outcome",
time_varying_cols=["N", "L", "P"],
fixed_cols=["sex"],
method = "censoring",
parameters=SEQopts(weighted = True,
weight_preexpansion=True,
treatment_level=[1,2])
)
s.expand()
s.fit()
matrix = s.outcome_model[0].summary2().tables[1]["Coef."].to_list()
assert matrix == [-111.35419661939163, -12.571187230338328, 9.234157699403015,
-0.6336774763031923, 0.016754692338530056, 5.8240772329087225,
-0.08598454090661659]