-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdelete_short_files.py.py
42 lines (26 loc) · 1022 Bytes
/
delete_short_files.py.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
import numpy as np
import os
# Can be changed to train, test or val
dataset_folder= 'dataset/val'
document_df=pd.read_excel('excel_files/val_excel.xlsx')
unprocessed_length = len(document_df)
file_names=[]
for i in range(len(document_df)):
line = str(document_df['Utterance'][i])
#line = document_df['Utterance'][i]
splitted_line = line.split()
if len(splitted_line)<=1:
file_name = f"{dataset_folder}/{document_df['fileName'][i]}.wav"
document_df.drop(i, inplace=True)
if os.path.exists(file_name):
os.remove(file_name)
document_df.drop(i, inplace=True)
print("Deleted")
else:
print("Path does not exists!")
document_df = document_df.reset_index(drop=True)
document_df.to_excel("Final_Result.xlsx")
print("Length before preprocessing = ",unprocessed_length)
print("Length after preprocessing = ", len(document_df))
print("Difference = ", (unprocessed_length-len(document_df)))