|
2 | 2 | import shutil |
3 | 3 | import random |
4 | 4 |
|
5 | | -from settings.settings import ( |
6 | | - COPY_DESTINATION_FOLDER, |
7 | | - COPY_SOURCE_FOLDER, |
8 | | - COPY_PERCENTAGE_TO_COPY, |
9 | | - COPY_RANDOM_MODE, |
10 | | - COPY_FIXED_NUMBER_TO_COPY |
11 | | -) |
| 5 | +from settings.settings import COPY_SOURCE_FOLDER, DATA_PATH, COPY_FIXED_NUMBER_TO_COPY, TRAIN_PERCENTAGE |
12 | 6 |
|
13 | 7 |
|
14 | 8 | def copy_files( |
15 | 9 | source_folder, |
16 | | - destination_folder, |
17 | | - percentage=None, |
18 | | - fixed_number=None, |
| 10 | + dataset_folder, |
| 11 | + fixed_number, |
19 | 12 | random_mode=True |
20 | 13 | ): |
21 | 14 |
|
22 | 15 | files = [file_name for file_name in os.listdir(source_folder) |
23 | 16 | if os.path.isfile(os.path.join(source_folder, file_name))] |
24 | 17 |
|
25 | | - if percentage is not None: |
26 | | - total_to_copy = int(len(files) * percentage / 100) |
27 | | - elif fixed_number is not None: |
28 | | - total_to_copy = min(fixed_number, len(files)) |
29 | | - else: |
30 | | - raise ValueError("Either percentage or fixed_number must be provided!") |
| 18 | + total_to_copy = min(fixed_number, len(files)) |
31 | 19 |
|
32 | 20 | if random_mode: |
33 | 21 | chosen_files = random.sample(files, total_to_copy) |
34 | 22 | else: |
35 | 23 | chosen_files = files[:total_to_copy] |
36 | 24 |
|
37 | | - for file_name in chosen_files: |
38 | | - shutil.copy2(os.path.join(source_folder, file_name), destination_folder) |
| 25 | + # Splitting the chosen files for train and validation |
| 26 | + num_train = int(TRAIN_PERCENTAGE * total_to_copy) |
| 27 | + train_files = chosen_files[:num_train] |
| 28 | + valid_files = chosen_files[num_train:] |
| 29 | + |
| 30 | + # Define train and valid destination folders |
| 31 | + train_destination_folder = os.path.join(dataset_folder, 'train') |
| 32 | + valid_destination_folder = os.path.join(dataset_folder, 'valid') |
39 | 33 |
|
40 | | - print(f"{total_to_copy} files have been copied from {source_folder} to {destination_folder}.") |
| 34 | + for file_name in train_files: |
| 35 | + shutil.copy2(os.path.join(source_folder, file_name), train_destination_folder) |
| 36 | + for file_name in valid_files: |
| 37 | + shutil.copy2(os.path.join(source_folder, file_name), valid_destination_folder) |
| 38 | + |
| 39 | + print(f"{num_train} files have been copied from {source_folder} to {train_destination_folder}.") |
| 40 | + print(f"{total_to_copy - num_train} files have been copied from {source_folder} to {valid_destination_folder}.") |
41 | 41 |
|
42 | 42 |
|
43 | 43 | if __name__ == '__main__': |
44 | 44 | copy_files( |
45 | 45 | COPY_SOURCE_FOLDER, |
46 | | - COPY_DESTINATION_FOLDER, |
47 | | - percentage=COPY_PERCENTAGE_TO_COPY, |
| 46 | + DATA_PATH, |
48 | 47 | fixed_number=COPY_FIXED_NUMBER_TO_COPY, |
49 | | - random_mode=COPY_RANDOM_MODE |
| 48 | + random_mode=True |
50 | 49 | ) |
0 commit comments