9
9
pipeline = Pipeline .from_pretrained ("pyannote/speaker-diarization" ,
10
10
use_auth_token = AUTH_TOKEN )
11
11
12
-
13
- def diarize_audio (audio_path , out_dir , num_speakers = 2 , keep_turn = True ):
12
+ def diarize_audio (audio_path , out_dir = None , num_speakers = None , keep_turn = False , min_sec = 0.5 , max_sec = None ):
14
13
sr , audio = wavfile .read (audio_path )
15
14
diarization = pipeline (audio_path , num_speakers = num_speakers )
16
-
17
- out_dir = os .path .splitext (audio_path )[0 ]
18
- os .makedirs (out_dir , exist_ok = True )
19
-
15
+
20
16
start_frames , end_frames = None , None
21
17
last_spk = None
22
18
i = 0
23
19
for turn , _ , speaker in diarization .itertracks (yield_label = True ):
24
20
spk = speaker
21
+ if out_dir is None :
22
+ out_dir = spk
23
+ os .makedirs (out_dir , exist_ok = True )
24
+
25
25
print (f"start={ turn .start :.1f} s stop={ turn .end :.1f} s speaker: { spk } " )
26
26
27
27
if keep_turn :
28
-
28
+ if not start_frames :
29
+ start_frames = int (turn .start )
29
30
if not last_spk :
30
31
last_spk = spk
31
32
if spk == last_spk :
32
33
end_frames = int (sr * turn .end )
33
34
else :
34
35
i += 1
36
+ if min_sec is not None and (end_frames - start_frames )/ sr < min_sec :
37
+ print (f"skipping { turn .start :.1f} s stop={ turn .end :.1f} because it is too short" )
38
+ continue
39
+ if max_sec is not None and (end_frames - start_frames )/ sr > max_sec :
40
+ print (f"skipping { turn .start :.1f} s stop={ turn .end :.1f} because it is too long" )
41
+ continue
42
+
35
43
wavfile .write (os .path .join (out_dir , f"{ i :04} -{ last_spk } .wav" ), sr , audio [start_frames :end_frames ])
36
44
37
45
last_spk = spk
@@ -40,14 +48,15 @@ def diarize_audio(audio_path, out_dir, num_speakers=2, keep_turn=True):
40
48
else :
41
49
wavfile .write (os .path .join (out_dir , f"{ i :04} -{ spk } .wav" ), sr , audio [int (sr * turn .start ):int (sr * turn .end )])
42
50
i += 1
43
-
51
+
44
52
45
53
if __name__ == "__main__" :
46
54
import argparse
47
- parser = argparse .ArgumentParser (help = "Diarize audio file" )
55
+ parser = argparse .ArgumentParser ("Diarize audio file" )
48
56
parser .add_argument ("audio_path" , type = str , help = "Path to audio file" )
49
- parser .add_argument ("out_dir" , type = str , help = "Path to output directory" )
57
+ parser .add_argument ("--min-sec" , type = float , default = 0.5 )
58
+ parser .add_argument ("--max-sec" , type = float , default = None )
50
59
parser .add_argument ("--num_speakers" , type = int , default = 2 , help = "Number of speakers" )
51
60
args = parser .parse_args ()
52
61
53
- diarize_audio (args .audio_path , args .out_dir , args .num_speakers , keep_turn = True )
62
+ diarize_audio (args .audio_path , args .out_dir , args .num_speakers , min_sec = args . min_sec , max_sec = args . max_sec , keep_turn = True )
0 commit comments