Skip to content

Commit

Permalink
Added support for sounddevice
Browse files Browse the repository at this point in the history
  • Loading branch information
aiXander committed May 14, 2020
1 parent aeb4c81 commit d748e90
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 9 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ Tested with:
* pyaudio --> Version: 0.2.11
* scipy --> Version: 1.4.1

Alternatively to pyaudio, you can use [Sounddevice](https://python-sounddevice.readthedocs.io/en/0.3.15/installation.html) which might be more compatible with Windows/Mac
* just run `python3 -m pip install sounddevice`
* Tested on Ubuntu 18.04 with sounddevice version 0.3.15
* The code to switch between the two sound interfaces is in the `__init__` function of the Stream_Analyzer class

**ToDo:**
* Implement realtime beat detection / melody extraction on top of FFT features (eg using Harmonic/Percussive decomposition)
* The pygame.transform operations sometimes cause weird visual artifacts (boxes) for some resolution settings --> fix??
Expand Down
2 changes: 1 addition & 1 deletion run_FFT_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
ear = Stream_Analyzer(
device = None, # Manually play with this (int) if you don't see anything
rate = None, # Audio samplerate, None uses the default source settings
FFT_window_size_ms = 60, # Window size used for the FFT transform
FFT_window_size_ms = 40, # Window size used for the FFT transform
updates_per_second = 1000, # How often to read the audio stream for new data
smoothing_length_ms = 50, # Apply some temporal smoothing to reduce noisy features
n_frequency_bins = 200, # The FFT features are grouped in bins
Expand Down
20 changes: 14 additions & 6 deletions src/stream_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from collections import deque
from scipy.signal import savgol_filter

from src.stream_reader import Stream_Reader
from src.fft import getFFT
from src.utils import *

Expand Down Expand Up @@ -36,11 +35,20 @@ def __init__(self,
self.verbose = verbose
self.visualize = visualize

self.stream_reader = Stream_Reader(
device = device,
rate = rate,
updates_per_second = updates_per_second,
verbose = verbose)
try:
from src.stream_reader_pyaudio import Stream_Reader
self.stream_reader = Stream_Reader(
device = device,
rate = rate,
updates_per_second = updates_per_second,
verbose = verbose)
except:
from src.stream_reader_sounddevice import Stream_Reader
self.stream_reader = Stream_Reader(
device = device,
rate = rate,
updates_per_second = updates_per_second,
verbose = verbose)

self.rate = self.stream_reader.rate

Expand Down
2 changes: 1 addition & 1 deletion src/stream_reader.py → src/stream_reader_pyaudio.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self,
self.verbose = verbose
self.pa = pyaudio.PyAudio()

#Temporary variables #hacks
#Temporary variables #hacks!
self.update_window_n_frames = 1024 #Don't remove this, needed for device testing!
self.data_buffer = None

Expand Down
137 changes: 137 additions & 0 deletions src/stream_reader_sounddevice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import numpy as np
import time, sys, math
from collections import deque
import sounddevice as sd

from src.utils import *

class Stream_Reader:
"""
The Stream_Reader continuously reads data from a selected sound source using PyAudio
Arguments:
device: int or None: Select which audio stream to read .
rate: float or None: Sample rate to use. Defaults to something supported.
updatesPerSecond: int: How often to record new data.
"""

def __init__(self,
device = None,
rate = None,
updates_per_second = 1000,
FFT_window_size = None,
verbose = False):

print("Available audio devices:")
device_dict = sd.query_devices()
print(device_dict)

try:
sd.check_input_settings(device=device, channels=1, dtype=np.float32, extra_settings=None, samplerate=rate)
except:
print("Input sound settings for device %s and samplerate %s Hz not supported, using defaults..." %(str(device), str(rate)))
rate = None
device = None

self.rate = rate
if rate is not None:
sd.default.samplerate = rate

self.device = device
if device is not None:
sd.default.device = device

self.verbose = verbose
self.data_buffer = None

# This part is a bit hacky, need better solution for this:
# Determine what the optimal buffer shape is by streaming some test audio
self.optimal_data_lengths = []
with sd.InputStream(samplerate=self.rate,
blocksize=0,
device=self.device,
channels=1,
dtype=np.float32,
latency='low',
callback=self.test_stream_read):
time.sleep(0.2)

self.update_window_n_frames = max(self.optimal_data_lengths)
del self.optimal_data_lengths

#Alternative:
#self.update_window_n_frames = round_up_to_even(44100 / updates_per_second)

self.stream = sd.InputStream(
samplerate=self.rate,
blocksize=self.update_window_n_frames,
device=None,
channels=1,
dtype=np.float32,
latency='low',
extra_settings=None,
callback=self.non_blocking_stream_read)

self.rate = self.stream.samplerate
self.device = self.stream.device

self.updates_per_second = self.rate / self.update_window_n_frames
self.info = ''
self.data_capture_delays = deque(maxlen=20)
self.new_data = False
if self.verbose:
self.data_capture_delays = deque(maxlen=20)
self.num_data_captures = 0

self.device_latency = device_dict[self.device]['default_low_input_latency']

print("\n##################################################################################################")
print("\nDefaulted to using first working mic, Running on mic %s with properties:" %str(self.device))
print(device_dict[self.device])
print('Which has a latency of %.2f ms' %(1000*self.device_latency))
print("\n##################################################################################################")
print('Recording audio at %d Hz\nUsing (non-overlapping) data-windows of %d samples (updating at %.2ffps)'
%(self.rate, self.update_window_n_frames, self.updates_per_second))

def non_blocking_stream_read(self, indata, frames, time_info, status):
if self.verbose:
start = time.time()
if status:
print(status)

if self.data_buffer is not None:
self.data_buffer.append_data(indata[:,0])
self.new_data = True

if self.verbose:
self.num_data_captures += 1
self.data_capture_delays.append(time.time() - start)

return

def test_stream_read(self, indata, frames, time_info, status):
'''
Dummy function to determine what blocksize the stream is using
'''
self.optimal_data_lengths.append(len(indata[:,0]))
return

def stream_start(self, data_windows_to_buffer = None):
self.data_windows_to_buffer = data_windows_to_buffer

if data_windows_to_buffer is None:
self.data_windows_to_buffer = int(self.updates_per_second / 2) #By default, buffer 0.5 second of audio
else:
self.data_windows_to_buffer = data_windows_to_buffer

self.data_buffer = numpy_data_buffer(self.data_windows_to_buffer, self.update_window_n_frames)

print("\n--🎙 -- Starting live audio stream...\n")
self.stream.start()
self.stream_start_time = time.time()

def terminate(self):
print("👋 Sending stream termination command...")
self.stream.stop()
2 changes: 1 addition & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class numpy_data_buffer:
A fast, circular FIFO buffer in numpy with minimal memory interactions by using an array of index pointers
"""

def __init__(self, n_windows, samples_per_window, dtype = np.int32, start_value = 0, data_dimensions = 1):
def __init__(self, n_windows, samples_per_window, dtype = np.float32, start_value = 0, data_dimensions = 1):
self.n_windows = n_windows
self.data_dimensions = data_dimensions
self.samples_per_window = samples_per_window
Expand Down

0 comments on commit d748e90

Please sign in to comment.