@@ -105,33 +105,33 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
105
105
# Search for an existing run key in disc. A individual model might have
106
106
# a timeout and hence was not written to disc
107
107
for i , (run_key , value ) in enumerate (estimator .run_history .data .items ()):
108
- if i == 0 :
109
- # Ignore dummy run
110
- continue
111
108
if 'SUCCESS' not in str (value .status ):
112
109
continue
113
110
114
111
run_key_model_run_dir = estimator ._backend .get_numrun_directory (
115
- estimator .seed , run_key .config_id , run_key .budget )
112
+ estimator .seed , run_key .config_id + 1 , run_key .budget )
116
113
if os .path .exists (run_key_model_run_dir ):
114
+ # Runkey config id is different from the num_run
115
+ # more specifically num_run = config_id + 1(dummy)
116
+ successful_num_run = run_key .config_id + 1
117
117
break
118
118
119
119
if resampling_strategy == HoldoutValTypes .holdout_validation :
120
120
model_file = os .path .join (run_key_model_run_dir ,
121
- f"{ estimator .seed } .{ run_key . config_id } .{ run_key .budget } .model" )
121
+ f"{ estimator .seed } .{ successful_num_run } .{ run_key .budget } .model" )
122
122
assert os .path .exists (model_file ), model_file
123
123
model = estimator ._backend .load_model_by_seed_and_id_and_budget (
124
- estimator .seed , run_key . config_id , run_key .budget )
124
+ estimator .seed , successful_num_run , run_key .budget )
125
125
assert isinstance (model .named_steps ['network' ].get_network (), torch .nn .Module )
126
126
elif resampling_strategy == CrossValTypes .k_fold_cross_validation :
127
127
model_file = os .path .join (
128
128
run_key_model_run_dir ,
129
- f"{ estimator .seed } .{ run_key . config_id } .{ run_key .budget } .cv_model"
129
+ f"{ estimator .seed } .{ successful_num_run } .{ run_key .budget } .cv_model"
130
130
)
131
131
assert os .path .exists (model_file ), model_file
132
132
133
133
model = estimator ._backend .load_cv_model_by_seed_and_id_and_budget (
134
- estimator .seed , run_key . config_id , run_key .budget )
134
+ estimator .seed , successful_num_run , run_key .budget )
135
135
assert isinstance (model , VotingClassifier )
136
136
assert len (model .estimators_ ) == 3
137
137
assert isinstance (model .estimators_ [0 ].named_steps ['network' ].get_network (),
@@ -142,7 +142,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
142
142
# Make sure that predictions on the test data are printed and make sense
143
143
test_prediction = os .path .join (run_key_model_run_dir ,
144
144
estimator ._backend .get_prediction_filename (
145
- 'test' , estimator .seed , run_key . config_id ,
145
+ 'test' , estimator .seed , successful_num_run ,
146
146
run_key .budget ))
147
147
assert os .path .exists (test_prediction ), test_prediction
148
148
assert np .shape (np .load (test_prediction , allow_pickle = True ))[0 ] == np .shape (X_test )[0 ]
@@ -152,7 +152,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
152
152
ensemble_prediction = os .path .join (run_key_model_run_dir ,
153
153
estimator ._backend .get_prediction_filename (
154
154
'ensemble' ,
155
- estimator .seed , run_key . config_id ,
155
+ estimator .seed , successful_num_run ,
156
156
run_key .budget ))
157
157
assert os .path .exists (ensemble_prediction ), ensemble_prediction
158
158
assert np .shape (np .load (ensemble_prediction , allow_pickle = True ))[0 ] == np .shape (
@@ -213,10 +213,16 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
213
213
X_train , X_test , y_train , y_test = sklearn .model_selection .train_test_split (
214
214
X , y , random_state = 1 )
215
215
216
+ include = None
217
+ # for python less than 3.7, learned entity embedding
218
+ # is not able to be stored on disk (only on CI)
219
+ if sys .version_info < (3 , 7 ):
220
+ include = {'network_embedding' : ['NoEmbedding' ]}
216
221
# Search for a good configuration
217
222
estimator = TabularRegressionTask (
218
223
backend = backend ,
219
224
resampling_strategy = resampling_strategy ,
225
+ include_components = include
220
226
)
221
227
222
228
estimator .search (
@@ -267,32 +273,32 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
267
273
# Search for an existing run key in disc. A individual model might have
268
274
# a timeout and hence was not written to disc
269
275
for i , (run_key , value ) in enumerate (estimator .run_history .data .items ()):
270
- if i == 0 :
271
- # Ignore dummy run
272
- continue
273
276
if 'SUCCESS' not in str (value .status ):
274
277
continue
275
278
276
279
run_key_model_run_dir = estimator ._backend .get_numrun_directory (
277
- estimator .seed , run_key .config_id , run_key .budget )
280
+ estimator .seed , run_key .config_id + 1 , run_key .budget )
278
281
if os .path .exists (run_key_model_run_dir ):
282
+ # Runkey config id is different from the num_run
283
+ # more specifically num_run = config_id + 1(dummy)
284
+ successful_num_run = run_key .config_id + 1
279
285
break
280
286
281
287
if resampling_strategy == HoldoutValTypes .holdout_validation :
282
288
model_file = os .path .join (run_key_model_run_dir ,
283
- f"{ estimator .seed } .{ run_key . config_id } .{ run_key .budget } .model" )
289
+ f"{ estimator .seed } .{ successful_num_run } .{ run_key .budget } .model" )
284
290
assert os .path .exists (model_file ), model_file
285
291
model = estimator ._backend .load_model_by_seed_and_id_and_budget (
286
- estimator .seed , run_key . config_id , run_key .budget )
292
+ estimator .seed , successful_num_run , run_key .budget )
287
293
assert isinstance (model .named_steps ['network' ].get_network (), torch .nn .Module )
288
294
elif resampling_strategy == CrossValTypes .k_fold_cross_validation :
289
295
model_file = os .path .join (
290
296
run_key_model_run_dir ,
291
- f"{ estimator .seed } .{ run_key . config_id } .{ run_key .budget } .cv_model"
297
+ f"{ estimator .seed } .{ successful_num_run } .{ run_key .budget } .cv_model"
292
298
)
293
299
assert os .path .exists (model_file ), model_file
294
300
model = estimator ._backend .load_cv_model_by_seed_and_id_and_budget (
295
- estimator .seed , run_key . config_id , run_key .budget )
301
+ estimator .seed , successful_num_run , run_key .budget )
296
302
assert isinstance (model , VotingRegressor )
297
303
assert len (model .estimators_ ) == 3
298
304
assert isinstance (model .estimators_ [0 ].named_steps ['network' ].get_network (),
@@ -303,7 +309,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
303
309
# Make sure that predictions on the test data are printed and make sense
304
310
test_prediction = os .path .join (run_key_model_run_dir ,
305
311
estimator ._backend .get_prediction_filename (
306
- 'test' , estimator .seed , run_key . config_id ,
312
+ 'test' , estimator .seed , successful_num_run ,
307
313
run_key .budget ))
308
314
assert os .path .exists (test_prediction ), test_prediction
309
315
assert np .shape (np .load (test_prediction , allow_pickle = True ))[0 ] == np .shape (X_test )[0 ]
@@ -313,7 +319,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
313
319
ensemble_prediction = os .path .join (run_key_model_run_dir ,
314
320
estimator ._backend .get_prediction_filename (
315
321
'ensemble' ,
316
- estimator .seed , run_key . config_id ,
322
+ estimator .seed , successful_num_run ,
317
323
run_key .budget ))
318
324
assert os .path .exists (ensemble_prediction ), ensemble_prediction
319
325
assert np .shape (np .load (ensemble_prediction , allow_pickle = True ))[0 ] == np .shape (
0 commit comments