99
1010from mlscorecheck .aggregated import (
1111 Evaluation ,
12+ Experiment ,
1213 compare_scores ,
1314 generate_dataset ,
1415 generate_evaluation ,
4445random_seeds = list (range (5 ))
4546
4647
47- def test_evaluate_timeout ():
48+ def test_evaluate_timeout () -> None :
4849 """
4950 Testing the evaluate_timeout function
5051 """
@@ -54,22 +55,28 @@ class Mock: # pylint: disable=too-few-public-methods
5455 Mock lp_problem class
5556 """
5657
57- def __init__ (self ):
58+ def __init__ (self ) -> None :
5859 """
5960 Constructor of the mock class
6061 """
6162 self .status = 0
6263
6364 mock = Mock ()
6465
66+ # Create dummy objects for testing - need Experiment, not Evaluation
67+ dummy_evaluation_dict = generate_evaluation (random_state = 42 )
68+ dummy_experiment = Experiment (evaluations = [dummy_evaluation_dict ], aggregation = "som" )
69+ dummy_scores : dict = {"acc" : 0.5 }
70+ dummy_subset : list [str ] = ["acc" ]
71+
6572 with warnings .catch_warnings (record = True ) as warn :
66- evaluate_timeout (mock , None , None , None , None )
73+ evaluate_timeout (mock , dummy_experiment , dummy_scores , 0.1 , dummy_subset )
6774 assert len (warn ) == 1
6875
6976
7077@pytest .mark .parametrize ("random_seed" , random_seeds )
7178@pytest .mark .parametrize ("aggregation" , ["mos" , "som" ])
72- def test_instantiation (random_seed : int , aggregation : str ):
79+ def test_instantiation (random_seed : int , aggregation : str ) -> None :
7380 """
7481 Testing the instantiation of evaluations
7582
@@ -95,7 +102,7 @@ def test_instantiation(random_seed: int, aggregation: str):
95102
96103@pytest .mark .parametrize ("random_seed" , random_seeds )
97104@pytest .mark .parametrize ("aggregation" , ["mos" , "som" ])
98- def test_sample_figures (random_seed : int , aggregation : str ):
105+ def test_sample_figures (random_seed : int , aggregation : str ) -> None :
99106 """
100107 Testing the sampling of figures
101108
@@ -119,8 +126,8 @@ def test_sample_figures(random_seed: int, aggregation: str):
119126@pytest .mark .parametrize ("aggregation" , ["mos" , "som" ])
120127@pytest .mark .parametrize ("rounding_decimals" , [2 , 3 , 4 ])
121128def test_linear_programming_success (
122- subset : list , random_seed : int , aggregation : str , rounding_decimals : int
123- ):
129+ subset : list [ str ] , random_seed : int , aggregation : str , rounding_decimals : int
130+ ) -> None :
124131 """
125132 Testing the linear programming functionalities
126133
@@ -163,8 +170,8 @@ def test_linear_programming_success(
163170@pytest .mark .parametrize ("aggregation" , ["mos" , "som" ])
164171@pytest .mark .parametrize ("rounding_decimals" , [2 , 3 , 4 ])
165172def test_linear_programming_evaluation_generation_success (
166- subset : list , random_seed : int , aggregation : str , rounding_decimals : int
167- ):
173+ subset : list [ str ] , random_seed : int , aggregation : str , rounding_decimals : int
174+ ) -> None :
168175 """
169176 Testing the linear programming functionalities by generating the evaluation
170177
@@ -175,9 +182,15 @@ def test_linear_programming_evaluation_generation_success(
175182 rounding_decimals (int): the number of decimals to round to
176183 """
177184
178- evaluation = generate_evaluation (random_state = random_seed , aggregation = aggregation )
185+ evaluation_dict = generate_evaluation (random_state = random_seed , aggregation = aggregation )
186+ assert isinstance (evaluation_dict , dict ), "generate_evaluation should return dict when return_scores=False"
179187
180- evaluation = Evaluation (** evaluation )
188+ evaluation = Evaluation (
189+ dataset = evaluation_dict ["dataset" ],
190+ folding = evaluation_dict ["folding" ],
191+ aggregation = evaluation_dict ["aggregation" ],
192+ fold_score_bounds = evaluation_dict .get ("fold_score_bounds" ),
193+ )
181194
182195 evaluation .sample_figures (random_state = random_seed )
183196
@@ -203,7 +216,7 @@ def test_linear_programming_evaluation_generation_success(
203216@pytest .mark .parametrize ("aggregation" , ["mos" , "som" ])
204217def test_linear_programming_evaluation_generation_failure (
205218 random_seed : int , aggregation : str
206- ):
219+ ) -> None :
207220 """
208221 Testing the linear programming functionalities by generating the evaluation
209222
@@ -212,9 +225,15 @@ def test_linear_programming_evaluation_generation_failure(
212225 aggregation (str): the aggregation to use ('mos'/'som')
213226 """
214227
215- evaluation = generate_evaluation (random_state = random_seed , aggregation = aggregation )
228+ evaluation_dict = generate_evaluation (random_state = random_seed , aggregation = aggregation )
229+ assert isinstance (evaluation_dict , dict ), "generate_evaluation should return dict when return_scores=False"
216230
217- evaluation = Evaluation (** evaluation )
231+ evaluation = Evaluation (
232+ dataset = evaluation_dict ["dataset" ],
233+ folding = evaluation_dict ["folding" ],
234+ aggregation = evaluation_dict ["aggregation" ],
235+ fold_score_bounds = evaluation_dict .get ("fold_score_bounds" ),
236+ )
218237
219238 evaluation .sample_figures (random_state = random_seed )
220239
@@ -229,7 +248,7 @@ def test_linear_programming_evaluation_generation_failure(
229248
230249@pytest .mark .parametrize ("random_seed" , random_seeds )
231250@pytest .mark .parametrize ("aggregation" , ["mos" , "som" ])
232- def test_get_fold_score_bounds (random_seed : int , aggregation : str ):
251+ def test_get_fold_score_bounds (random_seed : int , aggregation : str ) -> None :
233252 """
234253 Testing the extraction of fold score bounds
235254
@@ -238,9 +257,15 @@ def test_get_fold_score_bounds(random_seed: int, aggregation: str):
238257 aggregation (str): the aggregation to use ('mos'/'som')
239258 """
240259
241- evaluation = generate_evaluation (random_state = random_seed , aggregation = aggregation )
260+ evaluation_dict = generate_evaluation (random_state = random_seed , aggregation = aggregation )
261+ assert isinstance (evaluation_dict , dict ), "generate_evaluation should return dict when return_scores=False"
242262
243- evaluation = Evaluation (** evaluation )
263+ evaluation = Evaluation (
264+ dataset = evaluation_dict ["dataset" ],
265+ folding = evaluation_dict ["folding" ],
266+ aggregation = evaluation_dict ["aggregation" ],
267+ fold_score_bounds = evaluation_dict .get ("fold_score_bounds" ),
268+ )
244269 evaluation .sample_figures ().calculate_scores ()
245270
246271 score_bounds = get_fold_score_bounds (evaluation , feasible = True )
@@ -255,8 +280,8 @@ def test_get_fold_score_bounds(random_seed: int, aggregation: str):
255280@pytest .mark .parametrize ("aggregation" , ["mos" ])
256281@pytest .mark .parametrize ("rounding_decimals" , [3 , 4 ])
257282def test_linear_programming_success_bounds (
258- subset : list , random_seed : int , aggregation : str , rounding_decimals : int
259- ):
283+ subset : list [ str ] , random_seed : int , aggregation : str , rounding_decimals : int
284+ ) -> None :
260285 """
261286 Testing the linear programming functionalities by generating the evaluation
262287 with bounds
@@ -287,16 +312,22 @@ def test_linear_programming_success_bounds(
287312
288313 assert lp_program .status in (0 , 1 )
289314
290- evaluate_timeout (lp_program , skeleton , scores , 10 ** (- rounding_decimals ), subset )
315+ # Direct evaluation instead of evaluate_timeout since we have an Evaluation, not Experiment
316+ if lp_program .status == 1 :
317+ populated = skeleton .populate (lp_program )
318+ assert compare_scores (
319+ scores , populated .calculate_scores (), 10 ** (- rounding_decimals ), subset
320+ )
321+ assert populated .check_bounds ()["bounds_flag" ] is True
291322
292323
293324@pytest .mark .parametrize ("subset" , two_combs + three_combs + four_combs )
294325@pytest .mark .parametrize ("random_seed" , random_seeds )
295326@pytest .mark .parametrize ("aggregation" , ["mos" ])
296327@pytest .mark .parametrize ("rounding_decimals" , [3 , 4 ])
297328def test_linear_programming_failure_bounds (
298- subset : list , random_seed : int , aggregation : str , rounding_decimals : int
299- ):
329+ subset : list [ str ] , random_seed : int , aggregation : str , rounding_decimals : int
330+ ) -> None :
300331 """
301332 Testing the linear programming functionalities by generating the evaluation
302333 with bounds
@@ -327,16 +358,23 @@ def test_linear_programming_failure_bounds(
327358
328359 assert lp_program .status in (- 1 , 0 )
329360
330- evaluate_timeout (lp_program , skeleton , scores , 10 ** (- rounding_decimals ), subset )
361+ # Direct evaluation instead of evaluate_timeout since we have an Evaluation, not Experiment
362+ # For infeasible problems, just check the status
331363
332364
333- def test_others ():
365+ def test_others () -> None :
334366 """
335367 Testing other functionalities
336368 """
337369
338- evaluation = generate_evaluation (aggregation = "som" ,
370+ evaluation_dict = generate_evaluation (aggregation = "som" ,
339371 feasible_fold_score_bounds = True ,
340372 random_state = 5 )
373+ assert isinstance (evaluation_dict , dict ), "generate_evaluation should return dict when return_scores=False"
341374 with pytest .raises (ValueError ):
342- Evaluation (** evaluation )
375+ Evaluation (
376+ dataset = evaluation_dict ["dataset" ],
377+ folding = evaluation_dict ["folding" ],
378+ aggregation = evaluation_dict ["aggregation" ],
379+ fold_score_bounds = evaluation_dict .get ("fold_score_bounds" ),
380+ )
0 commit comments