21
21
import re
22
22
23
23
from nnunetv2 .paths import nnUNet_raw
24
+ from multiprocessing import Pool
24
25
25
26
26
27
def get_identifiers_from_splitted_dataset_folder (folder : str , file_ending : str ):
@@ -33,7 +34,12 @@ def get_identifiers_from_splitted_dataset_folder(folder: str, file_ending: str):
33
34
return files
34
35
35
36
36
- def create_lists_from_splitted_dataset_folder (folder : str , file_ending : str , identifiers : List [str ] = None ) -> List [
37
+ def create_paths_fn (folder , files , file_ending , f ):
38
+ p = re .compile (re .escape (f ) + r"_\d\d\d\d" + re .escape (file_ending ))
39
+ return [join (folder , i ) for i in files if p .fullmatch (i )]
40
+
41
+
42
+ def create_lists_from_splitted_dataset_folder (folder : str , file_ending : str , identifiers : List [str ] = None , num_processes : int = 12 ) -> List [
37
43
List [str ]]:
38
44
"""
39
45
does not rely on dataset.json
@@ -42,9 +48,11 @@ def create_lists_from_splitted_dataset_folder(folder: str, file_ending: str, ide
42
48
identifiers = get_identifiers_from_splitted_dataset_folder (folder , file_ending )
43
49
files = subfiles (folder , suffix = file_ending , join = False , sort = True )
44
50
list_of_lists = []
45
- for f in identifiers :
46
- p = re .compile (re .escape (f ) + r"_\d\d\d\d" + re .escape (file_ending ))
47
- list_of_lists .append ([join (folder , i ) for i in files if p .fullmatch (i )])
51
+
52
+ params_list = [(folder , files , file_ending , f ) for f in identifiers ]
53
+ with Pool (processes = num_processes ) as pool :
54
+ list_of_lists = pool .starmap (create_paths_fn , params_list )
55
+
48
56
return list_of_lists
49
57
50
58
0 commit comments