Skip to content

Commit df48d93

Browse files
committed
Update data splitting script
1 parent 95991f7 commit df48d93

File tree

2 files changed

+10
-12
lines changed

2 files changed

+10
-12
lines changed

data_processing/split_dataset.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def copy_files(ind, raw_d_dir, processed_d_dir, data_split, suffix=""):
2727
filename = f"{audio_prefix}{ind}{suffix}.wav"
2828
original_file_path = path.join(raw_d_dir, "speech", filename)
2929

30+
3031
if os.path.isfile(original_file_path):
3132
target_file_path = path.join(processed_d_dir, data_split, "inputs", filename)
3233
shutil.copy(original_file_path, target_file_path)
@@ -100,18 +101,15 @@ def _files_to_pandas_dataframe(extracted_dir, set_name, idx_range):
100101

101102
files = []
102103
for idx in idx_range:
103-
try:
104-
# original files
105-
input_file = path.abspath(path.join(extracted_dir, set_name, "inputs", audio_prefix + str(idx) + ".wav"))
106-
label_file = path.abspath(path.join(extracted_dir, set_name, "labels", motion_prefix + str(idx) + ".npz"))
107-
wav_size = path.getsize(input_file)
108-
files.append((input_file, wav_size, label_file))
109-
except OSError:
110-
continue
111-
112-
print(idx, end=' ')
113-
114-
return pandas.DataFrame(data=files, columns=["wav_filename", "wav_filesize", "bvh_filename"])
104+
# original files
105+
input_file = path.abspath(path.join(extracted_dir, set_name, "inputs", audio_prefix + str(idx).zfill(2) + ".wav"))
106+
label_file = path.abspath(path.join(extracted_dir, set_name, "labels", motion_prefix + str(idx).zfill(2) + ".npz"))
107+
if os.path.isfile(input_file):
108+
files.append((input_file, label_file))
109+
110+
print(idx, end=' ')
111+
112+
return pandas.DataFrame(data=files, columns=["wav_filename", "bvh_filename"])
115113

116114

117115
def check_dataset_directories(raw_data_dir):

data_processing/tools.pyc

-8.81 KB
Binary file not shown.

0 commit comments

Comments
 (0)