Skip to content

Commit 2138ec0

Browse files
Quantile Regression updates
1 parent 42c6f5c commit 2138ec0

File tree

3 files changed

+1268
-235
lines changed

3 files changed

+1268
-235
lines changed

Models/Code/quantileRegression/qr_eval2.ipynb

Lines changed: 1111 additions & 174 deletions
Large diffs are not rendered by default.

Models/Code/quantileRegression/qr_simulation.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,17 +112,16 @@ def _check_x(
112112

113113

114114
def _feature_transform(
115-
x: np.ndarray, feature_names: list
115+
x: np.ndarray, feature_names: list, convert_theta: bool = True
116116
) -> Tuple[np.ndarray, np.ndarray]:
117-
# Convert theta
118-
idx = feature_names.index("theta")
119-
x[:, idx] = np.mod(x[:, idx], 2 * np.pi)
117+
if convert_theta:
118+
# Convert theta
119+
idx = feature_names.index("theta")
120+
x[:, idx] = np.mod(x[:, idx], 2 * np.pi)
120121

121122
# x to constant
122123
keep_feats = ["theta", "theta_d", "x_d", "force_in", "alpha"]
123124
return x, np.array([i for i, f in enumerate(feature_names) if f in keep_feats])
124-
# idx = feature_names.index("x")
125-
# return x, np.delete(np.arange(x.shape[1]), idx)
126125

127126

128127
class QuantileRegressionSimulator:
@@ -155,6 +154,7 @@ def __init__(
155154
alpha_dist_params: dict | None = None,
156155
random_state: Union[int, None] = None,
157156
smoother: str = "butterdiff",
157+
convert_theta: bool = True,
158158
):
159159
self.nrow = x.shape[0]
160160
self.n_feat = 2
@@ -244,6 +244,7 @@ def __init__(
244244
self.freq = freq
245245
self.smoother = smoother
246246
self.model_params = model_params
247+
self.convert_theta = convert_theta
247248

248249
def train(self) -> List[lgb.Booster]:
249250
self.models = []
@@ -253,7 +254,9 @@ def train(self) -> List[lgb.Booster]:
253254
y = self.datasets[i].get_label()
254255

255256
# feature transform
256-
X, f_idx = _feature_transform(X, self.feature_names_ + ["alpha"])
257+
X, f_idx = _feature_transform(
258+
X, self.feature_names_ + ["alpha"], convert_theta=self.convert_theta
259+
)
257260

258261
# Train data
259262
dtrain = lgb.Dataset(X[:, f_idx], label=y, free_raw_data=False)
@@ -299,7 +302,9 @@ def simulate_paths(
299302
)
300303

301304
# Feature transform
302-
x, f_idx = _feature_transform(x, self.feature_names_ + ["alpha"])
305+
x, f_idx = _feature_transform(
306+
x, self.feature_names_ + ["alpha"], convert_theta=self.convert_theta
307+
)
303308

304309
# Make 2nd derivative predictions
305310
for i in range(self.n_feat):
@@ -317,3 +322,49 @@ def simulate_paths(
317322
)
318323

319324
return preds
325+
326+
def predict_single(self, X: pd.DataFrame, levels=list[int]) -> pd.DataFrame:
327+
# Levels to quantiles
328+
alpha = [(1 - lev / 100) / 2 for lev in levels]
329+
q = alpha + [1 - a for a in alpha]
330+
q_names = [f"{pref}_{lev}" for pref in ["lower", "upper"] for lev in levels]
331+
332+
# Convert X
333+
x = _check_x(X, self.feature_names_)
334+
335+
# Make predictions
336+
res_list = []
337+
for q_, q_name in zip(q, q_names):
338+
x_ = np.hstack((x, np.repeat(q_, x.shape[0]).reshape(-1, 1)))
339+
340+
# feature transform
341+
x_, f_idx = _feature_transform(
342+
x_, self.feature_names_ + ["alpha"], convert_theta=self.convert_theta
343+
)
344+
345+
for i, model in enumerate(self.models):
346+
var = ["theta", "x"][i]
347+
tmp = pd.DataFrame()
348+
349+
# Make 2nd derivative predictions
350+
p = model.predict(x_[:, f_idx])
351+
tmp["pred"] = p
352+
tmp["variable"] = f"{var}_d"
353+
tmp["quantile"] = q_name
354+
tmp["t"] = X.index
355+
res_list.append(tmp.copy())
356+
357+
# Make/propogate first derivative predictions?
358+
tmp["pred"] = self.dt * X[f"{var}_d"].to_numpy()
359+
tmp["variable"] = var
360+
tmp["quantile"] = q_name
361+
tmp["t"] = X.index
362+
res_list.append(tmp)
363+
all_preds = pd.concat(res_list)
364+
365+
# Pivot wide
366+
all_preds = pd.pivot_table(
367+
all_preds, index=["variable", "t"], columns="quantile", values="pred"
368+
).reset_index()
369+
370+
return all_preds

Models/Code/quantileRegression/run_qr_test.py

Lines changed: 98 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@
1313

1414
outpath = Path.home() / "Box/NASA_Figures/data"
1515
inpath = Path.cwd() / "../../../Data/cartpoleData"
16+
evalpath = Path.cwd() / "../../../Results/evaluation/predictions"
1617

1718
model = QuantileRegressionSimulator
1819

1920
params: dict[str, Any] = {
20-
"name": "qr_exp2", # Custom unique name used for saving predictions, parameters
21+
"name": "qr_exp21", # Custom unique name used for saving predictions, parameters
2122
"model_name": model.__name__,
2223
"model_params": {
23-
"m_factor": 100,
24+
"m_factor": 10,
2425
"freq": 1 / 4,
2526
"alpha_dist": "beta",
2627
"alpha_dist_params": {
@@ -29,26 +30,34 @@
2930
},
3031
"dt": 0.01,
3132
"model_params": {
32-
"num_iterations": 500,
33-
"learning_rate": 1e-2,
33+
"num_iterations": 1000,
34+
"learning_rate": 1e-3,
3435
},
35-
"smooth_derv_est": False,
36-
"smoothing_samples": 100,
37-
"smoothing_perc": 0.95,
36+
"smooth_derv_est": True,
37+
"smoothing_samples": None,
38+
"smoothing_perc": 1.0,
3839
"smoother": "meandiff",
40+
"convert_theta": True,
3941
},
4042
# Which datasets
41-
"datasets": ["det"], # det, low_noise, high_noise only options
43+
"datasets": [
44+
"det",
45+
"low_noise",
46+
"high_noise",
47+
], # det, low_noise, high_noise only options
4248
# Validation parameters
4349
# Remainder of train always validated (unless train_seconds == 400)
4450
# Others must be specified
4551
"valid_train": True,
46-
"valid_valid": False,
47-
"valid_test": False,
48-
"train_seconds": 400, # Use first __ seconds of data to train, rest for val
52+
"valid_valid": True,
53+
"valid_test": True,
54+
"train_seconds": 100, # Use first __ seconds of data to train
55+
"val_train_start": 400, # Start val after __ seconds, same as train_seconds if None
56+
"val_train_seconds": 0, # Use __ seconds of remaining data to val, None = all
4957
"n_sims": 100,
5058
"levels": [50, 80, 95],
5159
"var_names": ["theta", "x", "theta_d", "x_d"],
60+
"eval_modes": ["single", "multi"],
5261
"random_state": 6,
5362
}
5463

@@ -65,20 +74,35 @@
6574
if resp.lower()[0] == "n":
6675
sys.exit()
6776

68-
# Propogate param
77+
# Propogate param, set params
6978
params["model_params"]["random_state"] = params["random_state"]
7079

71-
# Read in data
80+
# Loop through datasets
81+
all_sim_data = []
7282
for dname in params["datasets"]:
7383
# Get all relevent datasets
7484
valid_sets, valid_starts = {}, {}
7585
data = pd.read_csv(inpath / f"{dname}_train.csv", index_col="t")
7686
train = data.loc[: params["train_seconds"]].copy()
7787

7888
# Get validation sets
79-
if len(train) != len(data):
80-
valid_sets["val_train"] = data.loc[params["train_seconds"] :].copy()
81-
skiprows = round(params["train_seconds"] * params["model_params"]["dt"])
89+
if params["val_train_seconds"] is None:
90+
params["val_train_seconds"] = 500 - params["train_seconds"]
91+
92+
if params["val_train_seconds"] > 0:
93+
sp = (
94+
params["train_seconds"]
95+
if params["val_train_start"] is None
96+
else params["val_train_start"]
97+
)
98+
99+
# Save validation data
100+
valid_sets["val_train"] = data.loc[
101+
sp : (sp + params["val_train_seconds"])
102+
].copy()
103+
104+
# Get correct starting point
105+
skiprows = round(sp / params["model_params"]["dt"])
82106
valid_starts["val_train"] = (
83107
pd.read_csv(
84108
inpath / "det_train.csv",
@@ -99,10 +123,8 @@
99123
.to_numpy()
100124
)
101125
if params["valid_valid"]:
102-
valid_sets["valid"] = pd.read_csv(
103-
inpath / f"{dname}_val.csv", index_col="t"
104-
)
105-
valid_starts["valid"] = (
126+
valid_sets["val"] = pd.read_csv(inpath / f"{dname}_val.csv", index_col="t")
127+
valid_starts["val"] = (
106128
pd.read_csv(
107129
inpath / "det_val.csv",
108130
nrows=1,
@@ -124,7 +146,7 @@
124146
)
125147

126148
# Add valid starts to parameters
127-
params["valid_starts"] = valid_starts
149+
params["valid_starts"] = {k: list(v) for k, v in valid_starts.items()}
128150

129151
# Train model
130152
print("Training Model")
@@ -141,47 +163,70 @@
141163
# Simulate over validation segments
142164
sim_data_list = []
143165
for name, val_data in valid_sets.items():
144-
# Simulate trajectories
145-
sims = curr_model.simulate_paths(
146-
valid_starts[name],
147-
force=val_data.force_in.to_numpy(),
148-
n=params["n_sims"],
149-
steps=val_data.shape[0],
150-
) # nsims x nsteps x 4
151-
152-
for i, var in enumerate(params["var_names"]):
153-
# Caculate quantiles
154-
sim_df = pd.DataFrame(
155-
np.quantile(sims[..., i], axis=0, q=q).T,
156-
columns=q_names,
157-
index=val_data.index,
158-
)
159-
sim_df["mean"] = sims[..., i].mean(axis=0)
160-
sim_df["actual"] = val_data[var]
161-
sim_df["name"] = name
162-
sim_df["variable"] = var
163-
sim_df["t"] = sim_df.index
164-
sim_df = sim_df.reset_index(drop=True)
165-
sim_data_list.append(sim_df)
166+
for eval_mode in params["eval_modes"]:
167+
if eval_mode == "multi":
168+
# Simulate trajectories
169+
sims = curr_model.simulate_paths(
170+
valid_starts[name],
171+
force=val_data.force_in.to_numpy(),
172+
n=params["n_sims"],
173+
steps=val_data.shape[0],
174+
) # nsims x nsteps x 4
175+
176+
for i, var in enumerate(params["var_names"]):
177+
# Caculate quantiles
178+
sim_df = pd.DataFrame(
179+
np.quantile(sims[..., i], axis=0, q=q).T,
180+
columns=q_names,
181+
index=val_data.index,
182+
)
183+
sim_df["mean"] = sims[..., i].mean(axis=0)
184+
sim_df["actual"] = val_data[var]
185+
sim_df["name"] = name
186+
sim_df["variable"] = var
187+
sim_df["t"] = sim_df.index
188+
sim_df["eval_mode"] = eval_mode
189+
sim_df = sim_df.reset_index(drop=True)
190+
sim_data_list.append(sim_df)
191+
elif eval_mode == "single":
192+
sims = curr_model.predict_single(val_data, levels=params["levels"])
193+
for var in params["var_names"]:
194+
sim_df = sims[sims.variable == var].copy()
195+
sim_df.index = val_data.index
196+
sim_df["actual"] = val_data[var]
197+
sim_df["name"] = name
198+
sim_df["t"] = sim_df.index
199+
sim_df["eval_mode"] = eval_mode
200+
sim_df = sim_df.reset_index(drop=True)
201+
sim_data_list.append(sim_df)
166202

167203
sim_data = pd.concat(sim_data_list)
168204
sim_data["noise"] = dname
169205

170206
print("Saving predictions, parameters, and model")
171207

172-
# Save predictions
173-
sim_data.to_csv(
174-
outpath / f"validation/predictions/{params['name']}.csv", index=False
175-
)
176-
177-
# Save parameters
178-
serializable_params = make_serializable(params | {"noise": dname})
179-
with open(outpath / f"validation/parameters/{params['name']}.json", "w") as f:
180-
f.write(json.dumps(serializable_params, indent=4))
208+
# Store predictions
209+
all_sim_data.append(sim_data)
181210

182211
# Save model
183212
del curr_model.datasets
184213
with open(
185-
outpath / f"validation/model_objects/{params['name']}.pkl", "wb"
214+
outpath / f"validation/model_objects/{params['name']}_{dname}.pkl", "wb"
186215
) as outp:
187216
pickle.dump(curr_model, outp, pickle.HIGHEST_PROTOCOL)
217+
218+
# Save predictions
219+
all_data = pd.concat(all_sim_data)
220+
all_data.to_csv(
221+
outpath / f"validation/predictions/{params['name']}.csv", index=False
222+
)
223+
224+
# Save to experiment eval directory as well
225+
all_data[
226+
(all_data.t - all_data.groupby("name")["t"].transform("min")) <= 10
227+
].to_csv(evalpath / f"{params['name']}.csv", index=False)
228+
229+
# Save parameters
230+
serializable_params = make_serializable(params)
231+
with open(outpath / f"validation/parameters/{params['name']}.json", "w") as f:
232+
f.write(json.dumps(serializable_params, indent=4))

0 commit comments

Comments
 (0)