|
13 | 13 |
|
14 | 14 | outpath = Path.home() / "Box/NASA_Figures/data"
|
15 | 15 | inpath = Path.cwd() / "../../../Data/cartpoleData"
|
| 16 | +evalpath = Path.cwd() / "../../../Results/evaluation/predictions" |
16 | 17 |
|
17 | 18 | model = QuantileRegressionSimulator
|
18 | 19 |
|
19 | 20 | params: dict[str, Any] = {
|
20 |
| - "name": "qr_exp2", # Custom unique name used for saving predictions, parameters |
| 21 | + "name": "qr_exp21", # Custom unique name used for saving predictions, parameters |
21 | 22 | "model_name": model.__name__,
|
22 | 23 | "model_params": {
|
23 |
| - "m_factor": 100, |
| 24 | + "m_factor": 10, |
24 | 25 | "freq": 1 / 4,
|
25 | 26 | "alpha_dist": "beta",
|
26 | 27 | "alpha_dist_params": {
|
|
29 | 30 | },
|
30 | 31 | "dt": 0.01,
|
31 | 32 | "model_params": {
|
32 |
| - "num_iterations": 500, |
33 |
| - "learning_rate": 1e-2, |
| 33 | + "num_iterations": 1000, |
| 34 | + "learning_rate": 1e-3, |
34 | 35 | },
|
35 |
| - "smooth_derv_est": False, |
36 |
| - "smoothing_samples": 100, |
37 |
| - "smoothing_perc": 0.95, |
| 36 | + "smooth_derv_est": True, |
| 37 | + "smoothing_samples": None, |
| 38 | + "smoothing_perc": 1.0, |
38 | 39 | "smoother": "meandiff",
|
| 40 | + "convert_theta": True, |
39 | 41 | },
|
40 | 42 | # Which datasets
|
41 |
| - "datasets": ["det"], # det, low_noise, high_noise only options |
| 43 | + "datasets": [ |
| 44 | + "det", |
| 45 | + "low_noise", |
| 46 | + "high_noise", |
| 47 | + ], # det, low_noise, high_noise only options |
42 | 48 | # Validation parameters
|
43 | 49 | # Remainder of train always validated (unless train_seconds == 400)
|
44 | 50 | # Others must be specified
|
45 | 51 | "valid_train": True,
|
46 |
| - "valid_valid": False, |
47 |
| - "valid_test": False, |
48 |
| - "train_seconds": 400, # Use first __ seconds of data to train, rest for val |
| 52 | + "valid_valid": True, |
| 53 | + "valid_test": True, |
| 54 | + "train_seconds": 100, # Use first __ seconds of data to train |
| 55 | + "val_train_start": 400, # Start val after __ seconds, same as train_seconds if None |
| 56 | + "val_train_seconds": 0, # Use __ seconds of remaining data to val, None = all |
49 | 57 | "n_sims": 100,
|
50 | 58 | "levels": [50, 80, 95],
|
51 | 59 | "var_names": ["theta", "x", "theta_d", "x_d"],
|
| 60 | + "eval_modes": ["single", "multi"], |
52 | 61 | "random_state": 6,
|
53 | 62 | }
|
54 | 63 |
|
|
65 | 74 | if resp.lower()[0] == "n":
|
66 | 75 | sys.exit()
|
67 | 76 |
|
68 |
| - # Propogate param |
| 77 | + # Propogate param, set params |
69 | 78 | params["model_params"]["random_state"] = params["random_state"]
|
70 | 79 |
|
71 |
| - # Read in data |
| 80 | + # Loop through datasets |
| 81 | + all_sim_data = [] |
72 | 82 | for dname in params["datasets"]:
|
73 | 83 | # Get all relevent datasets
|
74 | 84 | valid_sets, valid_starts = {}, {}
|
75 | 85 | data = pd.read_csv(inpath / f"{dname}_train.csv", index_col="t")
|
76 | 86 | train = data.loc[: params["train_seconds"]].copy()
|
77 | 87 |
|
78 | 88 | # Get validation sets
|
79 |
| - if len(train) != len(data): |
80 |
| - valid_sets["val_train"] = data.loc[params["train_seconds"] :].copy() |
81 |
| - skiprows = round(params["train_seconds"] * params["model_params"]["dt"]) |
| 89 | + if params["val_train_seconds"] is None: |
| 90 | + params["val_train_seconds"] = 500 - params["train_seconds"] |
| 91 | + |
| 92 | + if params["val_train_seconds"] > 0: |
| 93 | + sp = ( |
| 94 | + params["train_seconds"] |
| 95 | + if params["val_train_start"] is None |
| 96 | + else params["val_train_start"] |
| 97 | + ) |
| 98 | + |
| 99 | + # Save validation data |
| 100 | + valid_sets["val_train"] = data.loc[ |
| 101 | + sp : (sp + params["val_train_seconds"]) |
| 102 | + ].copy() |
| 103 | + |
| 104 | + # Get correct starting point |
| 105 | + skiprows = round(sp / params["model_params"]["dt"]) |
82 | 106 | valid_starts["val_train"] = (
|
83 | 107 | pd.read_csv(
|
84 | 108 | inpath / "det_train.csv",
|
|
99 | 123 | .to_numpy()
|
100 | 124 | )
|
101 | 125 | if params["valid_valid"]:
|
102 |
| - valid_sets["valid"] = pd.read_csv( |
103 |
| - inpath / f"{dname}_val.csv", index_col="t" |
104 |
| - ) |
105 |
| - valid_starts["valid"] = ( |
| 126 | + valid_sets["val"] = pd.read_csv(inpath / f"{dname}_val.csv", index_col="t") |
| 127 | + valid_starts["val"] = ( |
106 | 128 | pd.read_csv(
|
107 | 129 | inpath / "det_val.csv",
|
108 | 130 | nrows=1,
|
|
124 | 146 | )
|
125 | 147 |
|
126 | 148 | # Add valid starts to parameters
|
127 |
| - params["valid_starts"] = valid_starts |
| 149 | + params["valid_starts"] = {k: list(v) for k, v in valid_starts.items()} |
128 | 150 |
|
129 | 151 | # Train model
|
130 | 152 | print("Training Model")
|
|
141 | 163 | # Simulate over validation segments
|
142 | 164 | sim_data_list = []
|
143 | 165 | for name, val_data in valid_sets.items():
|
144 |
| - # Simulate trajectories |
145 |
| - sims = curr_model.simulate_paths( |
146 |
| - valid_starts[name], |
147 |
| - force=val_data.force_in.to_numpy(), |
148 |
| - n=params["n_sims"], |
149 |
| - steps=val_data.shape[0], |
150 |
| - ) # nsims x nsteps x 4 |
151 |
| - |
152 |
| - for i, var in enumerate(params["var_names"]): |
153 |
| - # Caculate quantiles |
154 |
| - sim_df = pd.DataFrame( |
155 |
| - np.quantile(sims[..., i], axis=0, q=q).T, |
156 |
| - columns=q_names, |
157 |
| - index=val_data.index, |
158 |
| - ) |
159 |
| - sim_df["mean"] = sims[..., i].mean(axis=0) |
160 |
| - sim_df["actual"] = val_data[var] |
161 |
| - sim_df["name"] = name |
162 |
| - sim_df["variable"] = var |
163 |
| - sim_df["t"] = sim_df.index |
164 |
| - sim_df = sim_df.reset_index(drop=True) |
165 |
| - sim_data_list.append(sim_df) |
| 166 | + for eval_mode in params["eval_modes"]: |
| 167 | + if eval_mode == "multi": |
| 168 | + # Simulate trajectories |
| 169 | + sims = curr_model.simulate_paths( |
| 170 | + valid_starts[name], |
| 171 | + force=val_data.force_in.to_numpy(), |
| 172 | + n=params["n_sims"], |
| 173 | + steps=val_data.shape[0], |
| 174 | + ) # nsims x nsteps x 4 |
| 175 | + |
| 176 | + for i, var in enumerate(params["var_names"]): |
| 177 | + # Caculate quantiles |
| 178 | + sim_df = pd.DataFrame( |
| 179 | + np.quantile(sims[..., i], axis=0, q=q).T, |
| 180 | + columns=q_names, |
| 181 | + index=val_data.index, |
| 182 | + ) |
| 183 | + sim_df["mean"] = sims[..., i].mean(axis=0) |
| 184 | + sim_df["actual"] = val_data[var] |
| 185 | + sim_df["name"] = name |
| 186 | + sim_df["variable"] = var |
| 187 | + sim_df["t"] = sim_df.index |
| 188 | + sim_df["eval_mode"] = eval_mode |
| 189 | + sim_df = sim_df.reset_index(drop=True) |
| 190 | + sim_data_list.append(sim_df) |
| 191 | + elif eval_mode == "single": |
| 192 | + sims = curr_model.predict_single(val_data, levels=params["levels"]) |
| 193 | + for var in params["var_names"]: |
| 194 | + sim_df = sims[sims.variable == var].copy() |
| 195 | + sim_df.index = val_data.index |
| 196 | + sim_df["actual"] = val_data[var] |
| 197 | + sim_df["name"] = name |
| 198 | + sim_df["t"] = sim_df.index |
| 199 | + sim_df["eval_mode"] = eval_mode |
| 200 | + sim_df = sim_df.reset_index(drop=True) |
| 201 | + sim_data_list.append(sim_df) |
166 | 202 |
|
167 | 203 | sim_data = pd.concat(sim_data_list)
|
168 | 204 | sim_data["noise"] = dname
|
169 | 205 |
|
170 | 206 | print("Saving predictions, parameters, and model")
|
171 | 207 |
|
172 |
| - # Save predictions |
173 |
| - sim_data.to_csv( |
174 |
| - outpath / f"validation/predictions/{params['name']}.csv", index=False |
175 |
| - ) |
176 |
| - |
177 |
| - # Save parameters |
178 |
| - serializable_params = make_serializable(params | {"noise": dname}) |
179 |
| - with open(outpath / f"validation/parameters/{params['name']}.json", "w") as f: |
180 |
| - f.write(json.dumps(serializable_params, indent=4)) |
| 208 | + # Store predictions |
| 209 | + all_sim_data.append(sim_data) |
181 | 210 |
|
182 | 211 | # Save model
|
183 | 212 | del curr_model.datasets
|
184 | 213 | with open(
|
185 |
| - outpath / f"validation/model_objects/{params['name']}.pkl", "wb" |
| 214 | + outpath / f"validation/model_objects/{params['name']}_{dname}.pkl", "wb" |
186 | 215 | ) as outp:
|
187 | 216 | pickle.dump(curr_model, outp, pickle.HIGHEST_PROTOCOL)
|
| 217 | + |
| 218 | + # Save predictions |
| 219 | + all_data = pd.concat(all_sim_data) |
| 220 | + all_data.to_csv( |
| 221 | + outpath / f"validation/predictions/{params['name']}.csv", index=False |
| 222 | + ) |
| 223 | + |
| 224 | + # Save to experiment eval directory as well |
| 225 | + all_data[ |
| 226 | + (all_data.t - all_data.groupby("name")["t"].transform("min")) <= 10 |
| 227 | + ].to_csv(evalpath / f"{params['name']}.csv", index=False) |
| 228 | + |
| 229 | + # Save parameters |
| 230 | + serializable_params = make_serializable(params) |
| 231 | + with open(outpath / f"validation/parameters/{params['name']}.json", "w") as f: |
| 232 | + f.write(json.dumps(serializable_params, indent=4)) |
0 commit comments