Skip to content

Commit

Permalink
Merge pull request deezer#444 from deezer/pad_waveform
Browse files Browse the repository at this point in the history
Added padding at the begining to avoid tf STFT reconstruction error
  • Loading branch information
mmoussallam authored Jul 24, 2020
2 parents 4744ffb + 3fcc4ea commit ca5cdd7
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 35 deletions.
1 change: 1 addition & 0 deletions spleeter/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def _create_evaluate_parser(parser_factory):
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
parser.add_argument('--mus_dir', **OPT_MUSDB)
parser.add_argument('-m', '--mwf', **OPT_MWF)
parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
return parser


Expand Down
2 changes: 1 addition & 1 deletion spleeter/commands/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def _separate_evaluation_dataset(arguments, musdb_root_directory, params):
bitrate='128k',
MWF=arguments.MWF,
verbose=arguments.verbose,
stft_backend="auto"),
stft_backend=arguments.stft_backend),
params)
return audio_output_directory

Expand Down
11 changes: 9 additions & 2 deletions spleeter/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,16 @@ def _build_stft_feature(self):
spec_name = self.spectrogram_name

if stft_name not in self._features:
# pad input with a frame of zeros
waveform = tf.concat([
tf.zeros((self._frame_length, self._n_channels)),
self._features['waveform']
],
0
)
stft_feature = tf.transpose(
stft(
tf.transpose(self._features['waveform']),
tf.transpose(waveform),
self._frame_length,
self._frame_step,
window_fn=lambda frame_length, dtype: (
Expand Down Expand Up @@ -341,7 +348,7 @@ def _inverse_stft(self, stft_t, time_crop=None):
reshaped = tf.transpose(inversed)
if time_crop is None:
time_crop = tf.shape(self._features['waveform'])[0]
return reshaped[:time_crop, :]
return reshaped[self._frame_length:self._frame_length+time_crop, :]

def _build_mwf_output_waveform(self):
""" Perform separation with multichannel Wiener Filtering using Norbert.
Expand Down
101 changes: 69 additions & 32 deletions tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,64 @@

from spleeter.utils.configuration import load_configuration

res_4stems = { "vocals": {
"SDR": -0.007,
"SAR": -19.231,
"SIR": -4.528,
"ISR": 0.000
},
"drums": {
"SDR": -0.071,
"SAR": -14.496,
"SIR": -4.987,
"ISR": 0.001
},
"bass":{
"SDR": -0.001,
"SAR": -12.426,
"SIR": -7.198,
"ISR": -0.001
BACKENDS = ["tensorflow", "librosa"]
TEST_CONFIGURATIONS = {el:el for el in BACKENDS}

res_4stems = {
"librosa": {
"vocals": {
"SDR": -0.007,
"SAR": -19.231,
"SIR": -4.528,
"ISR": 0.000
},
"drums": {
"SDR": -0.071,
"SAR": -14.496,
"SIR": -4.987,
"ISR": 0.001
},
"bass":{
"SDR": -0.001,
"SAR": -12.426,
"SIR": -7.198,
"ISR": -0.001
},
"other":{
"SDR": -1.453,
"SAR": -14.899,
"SIR": -4.678,
"ISR": -0.015
}
},
"other":{
"SDR": -1.453,
"SAR": -14.899,
"SIR": -4.678,
"ISR": -0.015
"tensorflow": {
"vocals": {
"SDR": 3.25e-05,
"SAR": -11.153575,
"SIR": -1.3849,
"ISR": 2.75e-05
},
"drums": {
"SDR": -0.079505,
"SAR": -15.7073575,
"SIR": -4.972755,
"ISR": 0.0013575
},
"bass":{
"SDR": 2.5e-06,
"SAR": -10.3520575,
"SIR": -4.272325,
"ISR": 2.5e-06
},
"other":{
"SDR": -1.359175,
"SAR": -14.7076775,
"SIR": -4.761505,
"ISR": -0.01528
}
}
}


def generate_fake_eval_dataset(path):
aa = get_default_audio_adapter()
n_songs = 2
Expand All @@ -68,12 +99,18 @@ def generate_fake_eval_dataset(path):
aa.save(filename, data, fs)


def test_evaluate(path="FAKE_MUSDB_DIR"):
generate_fake_eval_dataset(path)
p = create_argument_parser()
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", path])
params = load_configuration(arguments.configuration)
metrics = evaluate.entrypoint(arguments, params)
for instrument, metric in metrics.items():
for metric, value in metric.items():
assert np.allclose(np.median(value), res_4stems[instrument][metric], atol=1e-3)
@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS)
def test_evaluate(backend):
with TemporaryDirectory() as directory:

generate_fake_eval_dataset(directory)
p = create_argument_parser()
arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend])
params = load_configuration(arguments.configuration)
metrics = evaluate.entrypoint(arguments, params)
for instrument, metric in metrics.items():
for metric, value in metric.items():
assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3)


# test_evaluate("tensorflow")

0 comments on commit ca5cdd7

Please sign in to comment.