diff --git a/spleeter/commands/__init__.py b/spleeter/commands/__init__.py index 79773db3..a54e4c1f 100644 --- a/spleeter/commands/__init__.py +++ b/spleeter/commands/__init__.py @@ -170,6 +170,7 @@ def _create_evaluate_parser(parser_factory): parser.add_argument('-o', '--output_path', **OPT_OUTPUT) parser.add_argument('--mus_dir', **OPT_MUSDB) parser.add_argument('-m', '--mwf', **OPT_MWF) + parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND) return parser diff --git a/spleeter/commands/evaluate.py b/spleeter/commands/evaluate.py index 3ddfb74e..93e09902 100644 --- a/spleeter/commands/evaluate.py +++ b/spleeter/commands/evaluate.py @@ -77,7 +77,7 @@ def _separate_evaluation_dataset(arguments, musdb_root_directory, params): bitrate='128k', MWF=arguments.MWF, verbose=arguments.verbose, - stft_backend="auto"), + stft_backend=arguments.stft_backend), params) return audio_output_directory diff --git a/spleeter/model/__init__.py b/spleeter/model/__init__.py index 2097e9c7..b79e4244 100644 --- a/spleeter/model/__init__.py +++ b/spleeter/model/__init__.py @@ -275,9 +275,16 @@ def _build_stft_feature(self): spec_name = self.spectrogram_name if stft_name not in self._features: + # pad input with a frame of zeros + waveform = tf.concat([ + tf.zeros((self._frame_length, self._n_channels)), + self._features['waveform'] + ], + 0 + ) stft_feature = tf.transpose( stft( - tf.transpose(self._features['waveform']), + tf.transpose(waveform), self._frame_length, self._frame_step, window_fn=lambda frame_length, dtype: ( @@ -341,7 +348,7 @@ def _inverse_stft(self, stft_t, time_crop=None): reshaped = tf.transpose(inversed) if time_crop is None: time_crop = tf.shape(self._features['waveform'])[0] - return reshaped[:time_crop, :] + return reshaped[self._frame_length:self._frame_length+time_crop, :] def _build_mwf_output_waveform(self): """ Perform separation with multichannel Wiener Filtering using Norbert. diff --git a/tests/test_eval.py b/tests/test_eval.py index bb9edef2..298e421b 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -25,33 +25,64 @@ from spleeter.utils.configuration import load_configuration -res_4stems = { "vocals": { - "SDR": -0.007, - "SAR": -19.231, - "SIR": -4.528, - "ISR": 0.000 - }, - "drums": { - "SDR": -0.071, - "SAR": -14.496, - "SIR": -4.987, - "ISR": 0.001 - }, - "bass":{ - "SDR": -0.001, - "SAR": -12.426, - "SIR": -7.198, - "ISR": -0.001 +BACKENDS = ["tensorflow", "librosa"] +TEST_CONFIGURATIONS = {el:el for el in BACKENDS} + +res_4stems = { + "librosa": { + "vocals": { + "SDR": -0.007, + "SAR": -19.231, + "SIR": -4.528, + "ISR": 0.000 + }, + "drums": { + "SDR": -0.071, + "SAR": -14.496, + "SIR": -4.987, + "ISR": 0.001 + }, + "bass":{ + "SDR": -0.001, + "SAR": -12.426, + "SIR": -7.198, + "ISR": -0.001 + }, + "other":{ + "SDR": -1.453, + "SAR": -14.899, + "SIR": -4.678, + "ISR": -0.015 + } }, - "other":{ - "SDR": -1.453, - "SAR": -14.899, - "SIR": -4.678, - "ISR": -0.015 + "tensorflow": { + "vocals": { + "SDR": 3.25e-05, + "SAR": -11.153575, + "SIR": -1.3849, + "ISR": 2.75e-05 + }, + "drums": { + "SDR": -0.079505, + "SAR": -15.7073575, + "SIR": -4.972755, + "ISR": 0.0013575 + }, + "bass":{ + "SDR": 2.5e-06, + "SAR": -10.3520575, + "SIR": -4.272325, + "ISR": 2.5e-06 + }, + "other":{ + "SDR": -1.359175, + "SAR": -14.7076775, + "SIR": -4.761505, + "ISR": -0.01528 + } } } - def generate_fake_eval_dataset(path): aa = get_default_audio_adapter() n_songs = 2 @@ -68,12 +99,18 @@ def generate_fake_eval_dataset(path): aa.save(filename, data, fs) -def test_evaluate(path="FAKE_MUSDB_DIR"): - generate_fake_eval_dataset(path) - p = create_argument_parser() - arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", path]) - params = load_configuration(arguments.configuration) - metrics = evaluate.entrypoint(arguments, params) - for instrument, metric in metrics.items(): - for metric, value in metric.items(): - assert np.allclose(np.median(value), res_4stems[instrument][metric], atol=1e-3) \ No newline at end of file +@pytest.mark.parametrize('backend', TEST_CONFIGURATIONS) +def test_evaluate(backend): + with TemporaryDirectory() as directory: + + generate_fake_eval_dataset(directory) + p = create_argument_parser() + arguments = p.parse_args(["evaluate", "-p", "spleeter:4stems", "--mus_dir", directory, "-B", backend]) + params = load_configuration(arguments.configuration) + metrics = evaluate.entrypoint(arguments, params) + for instrument, metric in metrics.items(): + for metric, value in metric.items(): + assert np.allclose(np.median(value), res_4stems[backend][instrument][metric], atol=1e-3) + + +# test_evaluate("tensorflow") \ No newline at end of file