From d748e90e6ac89c6feaeb61f1eefa3ffcea1856b5 Mon Sep 17 00:00:00 2001 From: xander Date: Thu, 14 May 2020 13:36:39 +0200 Subject: [PATCH] Added support for sounddevice --- README.md | 5 + run_FFT_analyzer.py | 2 +- src/stream_analyzer.py | 20 ++- ...eam_reader.py => stream_reader_pyaudio.py} | 2 +- src/stream_reader_sounddevice.py | 137 ++++++++++++++++++ src/utils.py | 2 +- 6 files changed, 159 insertions(+), 9 deletions(-) rename src/{stream_reader.py => stream_reader_pyaudio.py} (99%) create mode 100644 src/stream_reader_sounddevice.py diff --git a/README.md b/README.md index 478b7df..d0dc8ea 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,11 @@ Tested with: * pyaudio --> Version: 0.2.11 * scipy --> Version: 1.4.1 +Alternatively to pyaudio, you can use [Sounddevice](https://python-sounddevice.readthedocs.io/en/0.3.15/installation.html) which might be more compatible with Windows/Mac +* just run `python3 -m pip install sounddevice` +* Tested on Ubuntu 18.04 with sounddevice version 0.3.15 +* The code to switch between the two sound interfaces is in the `__init__` function of the Stream_Analyzer class + **ToDo:** * Implement realtime beat detection / melody extraction on top of FFT features (eg using Harmonic/Percussive decomposition) * The pygame.transform operations sometimes cause weird visual artifacts (boxes) for some resolution settings --> fix?? diff --git a/run_FFT_analyzer.py b/run_FFT_analyzer.py index 935ff63..f751ea5 100644 --- a/run_FFT_analyzer.py +++ b/run_FFT_analyzer.py @@ -4,7 +4,7 @@ ear = Stream_Analyzer( device = None, # Manually play with this (int) if you don't see anything rate = None, # Audio samplerate, None uses the default source settings - FFT_window_size_ms = 60, # Window size used for the FFT transform + FFT_window_size_ms = 40, # Window size used for the FFT transform updates_per_second = 1000, # How often to read the audio stream for new data smoothing_length_ms = 50, # Apply some temporal smoothing to reduce noisy features n_frequency_bins = 200, # The FFT features are grouped in bins diff --git a/src/stream_analyzer.py b/src/stream_analyzer.py index bf1bacd..2b49dda 100644 --- a/src/stream_analyzer.py +++ b/src/stream_analyzer.py @@ -3,7 +3,6 @@ from collections import deque from scipy.signal import savgol_filter -from src.stream_reader import Stream_Reader from src.fft import getFFT from src.utils import * @@ -36,11 +35,20 @@ def __init__(self, self.verbose = verbose self.visualize = visualize - self.stream_reader = Stream_Reader( - device = device, - rate = rate, - updates_per_second = updates_per_second, - verbose = verbose) + try: + from src.stream_reader_pyaudio import Stream_Reader + self.stream_reader = Stream_Reader( + device = device, + rate = rate, + updates_per_second = updates_per_second, + verbose = verbose) + except: + from src.stream_reader_sounddevice import Stream_Reader + self.stream_reader = Stream_Reader( + device = device, + rate = rate, + updates_per_second = updates_per_second, + verbose = verbose) self.rate = self.stream_reader.rate diff --git a/src/stream_reader.py b/src/stream_reader_pyaudio.py similarity index 99% rename from src/stream_reader.py rename to src/stream_reader_pyaudio.py index 93ce06a..6f5c067 100644 --- a/src/stream_reader.py +++ b/src/stream_reader_pyaudio.py @@ -28,7 +28,7 @@ def __init__(self, self.verbose = verbose self.pa = pyaudio.PyAudio() - #Temporary variables #hacks + #Temporary variables #hacks! self.update_window_n_frames = 1024 #Don't remove this, needed for device testing! self.data_buffer = None diff --git a/src/stream_reader_sounddevice.py b/src/stream_reader_sounddevice.py new file mode 100644 index 0000000..8e9a238 --- /dev/null +++ b/src/stream_reader_sounddevice.py @@ -0,0 +1,137 @@ +import numpy as np +import time, sys, math +from collections import deque +import sounddevice as sd + +from src.utils import * + +class Stream_Reader: + """ + The Stream_Reader continuously reads data from a selected sound source using PyAudio + + Arguments: + + device: int or None: Select which audio stream to read . + rate: float or None: Sample rate to use. Defaults to something supported. + updatesPerSecond: int: How often to record new data. + + """ + + def __init__(self, + device = None, + rate = None, + updates_per_second = 1000, + FFT_window_size = None, + verbose = False): + + print("Available audio devices:") + device_dict = sd.query_devices() + print(device_dict) + + try: + sd.check_input_settings(device=device, channels=1, dtype=np.float32, extra_settings=None, samplerate=rate) + except: + print("Input sound settings for device %s and samplerate %s Hz not supported, using defaults..." %(str(device), str(rate))) + rate = None + device = None + + self.rate = rate + if rate is not None: + sd.default.samplerate = rate + + self.device = device + if device is not None: + sd.default.device = device + + self.verbose = verbose + self.data_buffer = None + + # This part is a bit hacky, need better solution for this: + # Determine what the optimal buffer shape is by streaming some test audio + self.optimal_data_lengths = [] + with sd.InputStream(samplerate=self.rate, + blocksize=0, + device=self.device, + channels=1, + dtype=np.float32, + latency='low', + callback=self.test_stream_read): + time.sleep(0.2) + + self.update_window_n_frames = max(self.optimal_data_lengths) + del self.optimal_data_lengths + + #Alternative: + #self.update_window_n_frames = round_up_to_even(44100 / updates_per_second) + + self.stream = sd.InputStream( + samplerate=self.rate, + blocksize=self.update_window_n_frames, + device=None, + channels=1, + dtype=np.float32, + latency='low', + extra_settings=None, + callback=self.non_blocking_stream_read) + + self.rate = self.stream.samplerate + self.device = self.stream.device + + self.updates_per_second = self.rate / self.update_window_n_frames + self.info = '' + self.data_capture_delays = deque(maxlen=20) + self.new_data = False + if self.verbose: + self.data_capture_delays = deque(maxlen=20) + self.num_data_captures = 0 + + self.device_latency = device_dict[self.device]['default_low_input_latency'] + + print("\n##################################################################################################") + print("\nDefaulted to using first working mic, Running on mic %s with properties:" %str(self.device)) + print(device_dict[self.device]) + print('Which has a latency of %.2f ms' %(1000*self.device_latency)) + print("\n##################################################################################################") + print('Recording audio at %d Hz\nUsing (non-overlapping) data-windows of %d samples (updating at %.2ffps)' + %(self.rate, self.update_window_n_frames, self.updates_per_second)) + + def non_blocking_stream_read(self, indata, frames, time_info, status): + if self.verbose: + start = time.time() + if status: + print(status) + + if self.data_buffer is not None: + self.data_buffer.append_data(indata[:,0]) + self.new_data = True + + if self.verbose: + self.num_data_captures += 1 + self.data_capture_delays.append(time.time() - start) + + return + + def test_stream_read(self, indata, frames, time_info, status): + ''' + Dummy function to determine what blocksize the stream is using + ''' + self.optimal_data_lengths.append(len(indata[:,0])) + return + + def stream_start(self, data_windows_to_buffer = None): + self.data_windows_to_buffer = data_windows_to_buffer + + if data_windows_to_buffer is None: + self.data_windows_to_buffer = int(self.updates_per_second / 2) #By default, buffer 0.5 second of audio + else: + self.data_windows_to_buffer = data_windows_to_buffer + + self.data_buffer = numpy_data_buffer(self.data_windows_to_buffer, self.update_window_n_frames) + + print("\n--🎙 -- Starting live audio stream...\n") + self.stream.start() + self.stream_start_time = time.time() + + def terminate(self): + print("👋 Sending stream termination command...") + self.stream.stop() \ No newline at end of file diff --git a/src/utils.py b/src/utils.py index ff60994..7d622b4 100644 --- a/src/utils.py +++ b/src/utils.py @@ -55,7 +55,7 @@ class numpy_data_buffer: A fast, circular FIFO buffer in numpy with minimal memory interactions by using an array of index pointers """ - def __init__(self, n_windows, samples_per_window, dtype = np.int32, start_value = 0, data_dimensions = 1): + def __init__(self, n_windows, samples_per_window, dtype = np.float32, start_value = 0, data_dimensions = 1): self.n_windows = n_windows self.data_dimensions = data_dimensions self.samples_per_window = samples_per_window