Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add stem activations generate script #49

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions medleydb/annotate/generate_activations_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from __future__ import division
import scipy.signal
import numpy as np
import librosa
import medleydb
import os
import argparse


def create_activation_annotation(
mtrack,
win_len=4096,
lpf_cutoff=0.075,
theta=0.15,
binarize=False
):

H = []

# MATLAB equivalent to @hanning(win_len)
win = scipy.signal.windows.hann(win_len + 2)[1:-1]

for track_id, track in mtrack.stems.items():
audio, rate = librosa.load(track.file_path, sr=44100, mono=True)
H.append(track_activation(audio.T, win_len, win))

# list to numpy array
H = np.array(H)

# normalization (to overall energy and # of sources)
E0 = np.sum(H, axis=0)

H = len(mtrack.stems) * H / np.max(E0)

# binary thresholding for low overall energy events
mask = np.ones(H.shape)
mask[:, E0 < 0.01] = 0
H = H * mask
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could replace 36-38 with this one-liner:
H[:, E0 < 0.01] = 0.0


# LP filter
b, a = scipy.signal.butter(2, lpf_cutoff, 'low')
H = scipy.signal.filtfilt(b, a, H, axis=1)

# logistic function to semi-binarize the output; confidence value
H = 1 - 1 / (1 + np.exp(np.dot(20, (H - theta))))

# binarize output
if binarize:
H_out = np.zeros(H.shape)
H_out[H > 0.5] = 1
else:
H_out = H

# add time column
time = librosa.core.frames_to_time(
np.arange(H.shape[1]), sr=rate, hop_length=win_len // 2
)

# stack time column to matrix
H_out = np.vstack((time, H_out))
return H_out.T


def track_activation(wave, win_len, win):
hop_len = win_len // 2

wave = np.lib.pad(
wave,
pad_width=(
win_len-hop_len,
0
),
mode='constant',
constant_values=0
)

# post padding
wave = librosa.util.fix_length(
wave, int(win_len * np.ceil(len(wave) / win_len))
)

# cut into frames
wavmat = librosa.util.frame(
wave,
frame_length=win_len,
hop_length=hop_len
)

# Envelope follower
wavmat = hwr(wavmat) ** 0.5 # half-wave rectification + compression

return np.mean((wavmat.T * win), axis=1)


def hwr(x):
''' half-wave rectification'''
return (x + np.abs(x)) / 2


def write_activations_to_csv(mtrack, activations, debug=False):
if debug:
activation_fname = "%s_ACTIVATION_CONF_debug.lab" % mtrack.track_id
else:
activation_fname = "%s_ACTIVATION_CONF.lab" % mtrack.track_id

activations_fpath = os.path.join(mtrack.annotation_dir, activation_fname)
stem_str = ",".join(["S%02d" % id for id in mtrack.stem_activations_idx])
np.savetxt(
activations_fpath,
activations,
header='time,' + stem_str,
delimiter=',',
fmt='%.4f'
)


def main(args):
mtrack = medleydb.MultiTrack(args.track_id)
activations = create_activation_annotation(mtrack)
if args.write_output:
write_activations_to_csv(mtrack, activations, args.debug)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("track_id",
type=str,
default="LizNelson_Rainfall",
help="MedleyDB track id. Ex. LizNelson_Rainfall")
parser.add_argument("--write_output",
type=bool,
default=True,
help="If true, write the output to a file")
parser.add_argument("--debug",
type=bool,
default=True,
help="If true, use debug filename output")
main(parser.parse_args())
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
'pytest',
'pytest-cov',
'pytest-pep8',
'scipy',
'librosa',
],
'docs': [
'sphinx==1.2.3', # autodoc was broken in 1.3.1
Expand Down
31 changes: 16 additions & 15 deletions tests/test_generate_melody_annotations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Tests for generate_melody_annotations script"""
import unittest
import os
import numpy as np
from medleydb import MultiTrack
from medleydb.annotate import generate_melody_annotations as G
Expand Down Expand Up @@ -40,6 +39,7 @@ def test_get_blank_melody_sequence(self):
])
self.assertTrue(array_almost_equal(actual, expected))


class TestSecToIdx(unittest.TestCase):

def test_defaults1(self):
Expand Down Expand Up @@ -72,14 +72,14 @@ def test_hop2(self):
expected = 4
self.assertEqual(actual, expected)


class TestAddSequenceToMelody(unittest.TestCase):

def setUp(self):
self.times = [0.0, HOP/FS, 2.0*HOP/FS, 3.0*HOP/FS, 4.0*HOP/FS]
self.dur = 0.02902494331 # seconds

def test_add_sequence_to_melody1(self):

f0_sequence = [
[self.times[0], 0.0],
[self.times[1], 0.0],
Expand All @@ -94,7 +94,9 @@ def test_add_sequence_to_melody1(self):
[self.times[3], 0.0],
[self.times[4], 0.0]
])
actual = G.add_sequence_to_melody(self.dur, f0_sequence, melody_sequence)
actual = G.add_sequence_to_melody(
self.dur, f0_sequence, melody_sequence
)
expected = np.array([
[self.times[0], 0.0],
[self.times[1], 0.0],
Expand All @@ -107,7 +109,7 @@ def test_add_sequence_to_melody1(self):
self.assertTrue(array_almost_equal(actual, expected))

def test_add_sequence_to_melody2(self):

f0_sequence = [
[self.times[0], 3.0],
[self.times[2], 1.7],
Expand All @@ -120,7 +122,9 @@ def test_add_sequence_to_melody2(self):
[self.times[3], 0.0],
[self.times[4], 0.0]
])
actual = G.add_sequence_to_melody(self.dur, f0_sequence, melody_sequence)
actual = G.add_sequence_to_melody(
self.dur, f0_sequence, melody_sequence
)
expected = np.array([
[self.times[0], 3.0],
[self.times[1], 0.0],
Expand All @@ -133,7 +137,7 @@ def test_add_sequence_to_melody2(self):
self.assertTrue(array_almost_equal(actual, expected))

def test_add_sequence_to_melody3(self):

f0_sequence = [
[self.times[0], 3.0],
[self.times[2], 1.7],
Expand Down Expand Up @@ -161,7 +165,7 @@ def test_add_sequence_to_melody3(self):
self.assertTrue(array_almost_equal(actual, expected))

def test_add_sequence_to_melody4(self):

f0_sequence = [
[self.times[0], 3.0],
[self.times[2], 1.7],
Expand All @@ -188,8 +192,8 @@ def test_add_sequence_to_melody4(self):
print(expected)
self.assertTrue(array_almost_equal(actual, expected))

def test_add_sequence_to_melody4(self):
def test_add_sequence_to_melody5(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch!


f0_sequence = [
[self.times[0], 3.0],
[self.times[2], 1.7],
Expand All @@ -216,8 +220,8 @@ def test_add_sequence_to_melody4(self):
print(expected)
self.assertTrue(array_almost_equal(actual, expected))

def test_add_sequence_to_melody4(self):
def test_add_sequence_to_melody6(self):

f0_sequence = [
[self.times[0], 3.0],
[self.times[2], 1.7],
Expand All @@ -244,6 +248,7 @@ def test_add_sequence_to_melody4(self):
print(expected)
self.assertTrue(array_almost_equal(actual, expected))


class TestCreateMelodyAnnotations(unittest.TestCase):

def setUp(self):
Expand Down Expand Up @@ -271,7 +276,3 @@ def test_melody3(self):
print(actual[0:2])
print(expected[0:2])
self.assertTrue(array_almost_equal(actual, expected))




2 changes: 1 addition & 1 deletion tests/test_multitrack.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def test_activation_conf_from_stem1(self):
def test_activation_conf_from_stem2(self):
actual = self.mtrack2.activation_conf_from_stem(4)
expected = None
self.assertEqual(actual, expected)
self.assertEqual(actual, expected)


class TestTrack(unittest.TestCase):
Expand Down