Skip to content

Commit

Permalink
v2 lets goo
Browse files Browse the repository at this point in the history
  • Loading branch information
m-bain committed Mar 31, 2023
1 parent bc27760 commit 189aeac
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 19 deletions.
5 changes: 1 addition & 4 deletions whisperx/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,7 @@ def align(
for sdx, srow in segments_arr.iterrows():

seg_idx = int(srow["segment-idx"])
try:
sub_start = int(srow["subsegment-idx-start"])
except:
import pdb; pdb.set_trace()
sub_start = int(srow["subsegment-idx-start"])
sub_end = int(srow["subsegment-idx-end"])

seg = transcript[seg_idx]
Expand Down
16 changes: 1 addition & 15 deletions whisperx/vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,18 +301,4 @@ def merge_chunks(segments, chunk_size):
"end": curr_end,
"segments": seg_idxs,
})
return merged_segments


if __name__ == "__main__":
import pandas as pd
input_fp = "tt298650_sync.wav"
df = pd.read_csv(f"/work/maxbain/tmp/{input_fp}.sad", sep=" ", header=None)
print(len(df))
N = 0.15
g = df[0].sub(df[1].shift())
input_base = input_fp.split('.')[0]
df = df.groupby(g.gt(N).cumsum()).agg({0:'min', 1:'max'})
df.to_csv(f"/work/maxbain/tmp/{input_base}.lab", header=None, index=False, sep=" ")
print(df)
import pdb; pdb.set_trace()
return merged_segments

0 comments on commit 189aeac

Please sign in to comment.