Skip to content

4 playground windowing #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions code/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from preprocessing.AudioPreprocessor import AudioPreprocessor

def main():
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
print(frames)

if __name__ == "__main__":
main()
137 changes: 137 additions & 0 deletions code/playground/windowing/AudioPreprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import numpy as np
import librosa
import librosa.display
import noisereduce as nr

class AudioPreprocessor:
@staticmethod
def int_to_float(array, type=np.float32):
"""
Change np.array int16 into np.float32
Parameters
----------
array: np.array
type: np.float32
Returns
-------
result : np.array
"""

if array.dtype == type:
return array

if array.dtype not in [np.float16, np.float32, np.float64]:
if np.max(np.abs(array)) == 0:
array = array.astype(np.float32)
array[:] = 0
else:
array = array.astype(np.float32) / np.max(np.abs(array))

return array

@staticmethod
def float_to_int(array, type=np.int16, divide_max_abs=True):
"""
Change np.array float32 / float64 into np.int16
Parameters
----------
array: np.array
type: np.int16
Returns
-------
result : np.array
"""

if array.dtype == type:
return array

if array.dtype not in [np.int16, np.int32, np.int64]:
if np.max(np.abs(array)) == 0:
array[:] = 0
array = type(array * np.iinfo(type).max)
else:
if divide_max_abs:
array = type(array / np.max(np.abs(array)) * np.iinfo(type).max)
else:
array = type(array * np.iinfo(type).max)

return array

@staticmethod
def remove_silence(y):
threshold = 0.005
pause_length_in_ms = 200
keep_at_start_and_end = 50
counter_below_threshold = 0
indices_to_remove = []

for i, amp in enumerate(y):
if abs(amp) < threshold:
counter_below_threshold += 1
else:
if counter_below_threshold > pause_length_in_ms:
for index in range(i-counter_below_threshold+keep_at_start_and_end, i-keep_at_start_and_end):
indices_to_remove.append(index)
counter_below_threshold = 0

if counter_below_threshold > pause_length_in_ms:
for index in range(len(y)-counter_below_threshold+keep_at_start_and_end, len(y)-keep_at_start_and_end):
indices_to_remove.append(index)

y_ = np.delete(y, indices_to_remove)

return y_

@staticmethod
def remove_noise(y, sr):
# prop_decrease 0.8 only reduces noise by 0.8 -> sound quality is better than at 1.0
y_ = nr.reduce_noise(y=y, sr=sr, prop_decrease=0.8)

return y_

@staticmethod
def create_frames(y, frame_size, overlap):
frames = []

if overlap >= frame_size or frame_size <= 0 or overlap < 0:
return frames

index = 0

while index + frame_size < y.shape[0]:
frames.append(y[index: index + frame_size])
index = index + frame_size - overlap

return frames

@staticmethod
def window_frames(frames, window_function=np.hanning):
windowed_frames = []

for frame in frames:
windowed_frames.append(frame * window_function(frame.shape[0]))

return windowed_frames

@staticmethod
def load_preprocessed_frames(filepath=None, y=None, sr=None):
if filepath is None and (y is None or sr is None):
raise ValueError("Either filepath or y and sr must be given.")

if y is None or sr is None:
y, sr = librosa.load(filepath)

y = AudioPreprocessor.remove_noise(y=y, sr=sr)
y = AudioPreprocessor.remove_silence(y=y)

frames = AudioPreprocessor.create_frames(y=y, frame_size=1000, overlap=100)
windowed_frames = AudioPreprocessor.window_frames(frames=frames)

return windowed_frames

def main():
frames = AudioPreprocessor.load_preprocessed_frames("./audio.wav")
print(frames)

if __name__ == '__main__':
main()
Binary file not shown.
47 changes: 47 additions & 0 deletions code/playground/windowing/fileHandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from tracemalloc import start
import librosa
import numpy as np
import matplotlib.pyplot as plt

class FileHandler:
def __init__(self, filepath):
self.y, self.sampling_rate = librosa.load(filepath)
self.total_time = self.y.size / self.sampling_rate

print(self.total_time)

def get_sampling_rate(self):
return self.sampling_rate

def get_frame(self, frame_time, start_frame):
frame_frames = int(self.sampling_rate * frame_time)
return self.y[start_frame:(start_frame + frame_frames)], frame_frames

def view(self):
plt.plot(np.linspace(0, self.y.size, self.y.size), self.y)
plt.show()

def autocorrelate(self, frame_size):
frame_frames = int(self.sampling_rate * frame_size)
frame_y = self.y[3200:(3200 + frame_frames)]
Fr = np.fft.fft(frame_y)
S = Fr * np.conjugate(Fr)
print(Fr)

print(abs(np.fft.ifft(S))[:10])
print(abs(np.fft.ifft(S)).size)

print(librosa.autocorrelate(frame_y)[:10])
print(librosa.autocorrelate(frame_y).size)

plt.plot(np.linspace(0, frame_frames, frame_frames), frame_y)
plt.show()
plt.plot(np.linspace(0, frame_frames, frame_frames), np.fft.ifft(S))
plt.plot(np.linspace(0, frame_frames, frame_frames), librosa.autocorrelate(frame_y))
plt.show()
return librosa.autocorrelate(frame_y * np.hanning(frame_frames))

def get_lpc(self, frame_time, order):
frame_y, frame_frames = self.get_frame(frame_time, 3200)

return librosa.lpc(frame_y * np.hanning(frame_frames), order=order)
Loading