Skip to content

Add simulate_rir_ism method for room impulse response simulation #2880

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/unittest/linux/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ fi
(
set -x
conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa>=0.8.0' parameterized 'requests>=2.20'
pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag
pip install kaldi-io SoundFile coverage pytest pytest-cov 'scipy==1.7.3' transformers expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag pyroomacoustics
)
# Install fairseq
git clone https://github.com/pytorch/fairseq
Expand Down
3 changes: 2 additions & 1 deletion .circleci/unittest/windows/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ esac
unidecode \
'protobuf<4.21.0' \
demucs \
tinytag
tinytag \
pyroomacoustics
)
# Install fairseq
git clone https://github.com/pytorch/fairseq
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ endif()
# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RIR "Enable RIR simulation" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_CTC_DECODER "Build Flashlight CTC decoder" ON)
option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
Expand Down
9 changes: 9 additions & 0 deletions docs/source/prototype.functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,12 @@ DSP
oscillator_bank
sinc_impulse_response
frequency_impulse_response

Room Impulse Response Simulation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated
:nosignatures:

simulate_rir_ism
24 changes: 24 additions & 0 deletions docs/source/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,27 @@ @inproceedings{valk2021voxlingua107
year={2021},
organization={IEEE}
}
@inproceedings{scheibler2018pyroomacoustics,
title={Pyroomacoustics: A python package for audio room simulation and array processing algorithms},
author={Scheibler, Robin and Bezzam, Eric and Dokmani{\'c}, Ivan},
booktitle={2018 IEEE international conference on acoustics, speech and signal processing (ICASSP)},
pages={351--355},
year={2018},
organization={IEEE}
}
@article{allen1979image,
title={Image method for efficiently simulating small-room acoustics},
author={Allen, Jont B and Berkley, David A},
journal={The Journal of the Acoustical Society of America},
volume={65},
number={4},
pages={943--950},
year={1979},
publisher={Acoustical Society of America}
}
@misc{wiki:Absorption_(acoustics),
author = "{Wikipedia contributors}",
title = "Absorption (acoustics) --- {W}ikipedia{,} The Free Encyclopedia",
url = "https://en.wikipedia.org/wiki/Absorption_(acoustics)",
note = "[Online]"
}
2 changes: 2 additions & 0 deletions test/torchaudio_unittest/common_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
skipIfNoMacOS,
skipIfNoModule,
skipIfNoQengine,
skipIfNoRIR,
skipIfNoSox,
skipIfPy310,
skipIfRocm,
Expand Down Expand Up @@ -47,6 +48,7 @@
"skipIfNoMacOS",
"skipIfNoModule",
"skipIfNoKaldi",
"skipIfNoRIR",
"skipIfNoSox",
"skipIfNoSoxBackend",
"skipIfRocm",
Expand Down
5 changes: 5 additions & 0 deletions test/torchaudio_unittest/common_utils/case_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ def skipIfNoModule(module, display_name=None):
reason="Kaldi features are not available.",
key="NO_KALDI",
)
skipIfNoRIR = _skipIf(
not torchaudio._extension._IS_RIR_AVAILABLE,
reason="RIR features are not available.",
key="NO_RIR",
)
skipIfNoCtcDecoder = _skipIf(
not is_ctc_decoder_available(),
reason="CTC decoder not available.",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from torchaudio_unittest.common_utils import PytorchTestCase

from .functional_test_impl import Functional64OnlyTestImpl, FunctionalTestImpl
from .functional_test_impl import Functional64OnlyTestImpl, FunctionalCPUOnlyTestImpl, FunctionalTestImpl


class FunctionalFloat32CPUTest(FunctionalTestImpl, PytorchTestCase):
Expand All @@ -17,3 +17,13 @@ class FunctionalFloat64CPUTest(FunctionalTestImpl, PytorchTestCase):
class FunctionalFloat64OnlyCPUTest(Functional64OnlyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")


class FunctionalCPUOnlyFloat32Test(FunctionalCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float32
device = torch.device("cpu")


class FunctionalCPUOnlyFloat64Test(FunctionalCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from torchaudio._internal import module_utils as _mod_utils

if _mod_utils.is_module_available("pyroomacoustics"):
import pyroomacoustics as pra

import torch
import torchaudio.prototype.functional as F
from parameterized import param, parameterized
from torchaudio_unittest.common_utils import nested_params, TestBaseMixin
from torchaudio_unittest.common_utils import nested_params, skipIfNoModule, skipIfNoRIR, TestBaseMixin

from .dsp_utils import freq_ir as freq_ir_np, oscillator_bank as oscillator_bank_np, sinc_ir as sinc_ir_np

Expand Down Expand Up @@ -424,3 +429,83 @@ def _debug_plot():
except AssertionError:
_debug_plot()
raise


@skipIfNoModule("pyroomacoustics")
@skipIfNoRIR
class FunctionalCPUOnlyTestImpl(TestBaseMixin):
@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_single_band(self, channel):
"""Test simulate_rir_ism function in the case where absorption coefficients are identical for all walls."""
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
# absorption is set as a float value indicating absorption coefficients are the same for every wall.
absorption = 0.5
# compute rir signal by torchaudio implementation
actual = F.simulate_rir_ism(room_dim, source, mic_array, max_order, absorption)
# compute rir signal by pyroomacoustics
room = pra.ShoeBox(
room_dim.detach().numpy(),
fs=16000,
materials=pra.Material(absorption),
max_order=max_order,
ray_tracing=False,
air_absorption=False,
)
# mic_locs is a numpy array of dimension `(3, channel)`.
mic_locs = mic_array.transpose(0, 1).double().detach().numpy()
room.add_microphone_array(mic_locs)
room.add_source(source.tolist())
room.compute_rir()
max_len = max([room.rir[i][0].shape[0] for i in range(channel)])
expected = torch.zeros(channel, max_len, dtype=self.dtype, device=self.device)
for i in range(channel):
expected[i, 0 : room.rir[i][0].shape[0]] = torch.from_numpy(room.rir[i][0])

self.assertEqual(expected, actual, atol=1e-3, rtol=1e-3)

@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_multi_band(self, channel):
"""Test simulate_rir_ism in the case where absorption coefficients are different for all walls."""
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
# absorption is set as a Tensor with dimensions `(7, 6)` indicating there are
# 6 walls and each wall has 7 absorption coefficients corresponds to 7 octave bands, respectively.
absorption = torch.rand(7, 6, dtype=self.dtype, device=self.device)
walls = ["west", "east", "south", "north", "floor", "ceiling"]
room = pra.ShoeBox(
room_dim.detach().numpy(),
fs=16000,
materials={
walls[i]: pra.Material(
{
"coeffs": absorption[:, i]
.reshape(
-1,
)
.detach()
.numpy(),
"center_freqs": [125.0, 250.0, 500.0, 1000.0, 2000.0, 4000.0, 8000.0],
}
)
for i in range(len(walls))
},
max_order=max_order,
ray_tracing=False,
air_absorption=False,
)
# mic_locs is a numpy array of dimension `(D, channel)`.
mic_locs = mic_array.transpose(0, 1).double().detach().numpy()
room.add_microphone_array(mic_locs)
room.add_source(source.tolist())
room.compute_rir()
max_len = max([room.rir[i][0].shape[0] for i in range(channel)])
expected = torch.zeros(channel, max_len, dtype=self.dtype, device=self.device)
for i in range(channel):
expected[i, 0 : room.rir[i][0].shape[0]] = torch.from_numpy(room.rir[i][0])
actual = F.simulate_rir_ism(room_dim, source, mic_array, max_order, absorption)
self.assertEqual(expected, actual, atol=1e-3, rtol=1e-3)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
from torchaudio_unittest.common_utils import PytorchTestCase

from .torchscript_consistency_test_impl import TorchScriptConsistencyTestImpl
from .torchscript_consistency_test_impl import TorchScriptConsistencyCPUOnlyTestImpl, TorchScriptConsistencyTestImpl


class TorchScriptConsistencyCPUFloat32Test(TorchScriptConsistencyTestImpl, PytorchTestCase):
Expand All @@ -12,3 +12,13 @@ class TorchScriptConsistencyCPUFloat32Test(TorchScriptConsistencyTestImpl, Pytor
class TorchScriptConsistencyCPUFloat64Test(TorchScriptConsistencyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")


class TorchScriptConsistencyCPUOnlyFloat32Test(TorchScriptConsistencyCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float32
device = torch.device("cpu")


class TorchScriptConsistencyCPUOnlyFloat64Test(TorchScriptConsistencyCPUOnlyTestImpl, PytorchTestCase):
dtype = torch.float64
device = torch.device("cpu")
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import torch
import torchaudio.prototype.functional as F
from torchaudio_unittest.common_utils import TestBaseMixin, torch_script
from parameterized import parameterized
from torchaudio_unittest.common_utils import skipIfNoRIR, TestBaseMixin, torch_script


class TorchScriptConsistencyTestImpl(TestBaseMixin):
Expand Down Expand Up @@ -62,3 +63,52 @@ def test_sinc_ir(self):
def test_freq_ir(self):
mags = torch.tensor([0, 0.5, 1.0], device=self.device, dtype=self.dtype)
self._assert_consistency(F.frequency_impulse_response, (mags,))


class TorchScriptConsistencyCPUOnlyTestImpl(TestBaseMixin):
def _assert_consistency(self, func, inputs, shape_only=False):
inputs_ = []
for i in inputs:
if torch.is_tensor(i):
i = i.to(device=self.device, dtype=self.dtype)
inputs_.append(i)
ts_func = torch_script(func)

torch.random.manual_seed(40)
output = func(*inputs_)

torch.random.manual_seed(40)
ts_output = ts_func(*inputs_)

if shape_only:
ts_output = ts_output.shape
output = output.shape
self.assertEqual(ts_output, output)

@skipIfNoRIR
@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_single_band(self, channel):
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
absorption = 0.5
center_frequency = torch.tensor([125, 250, 500, 1000, 2000, 4000, 8000], dtype=self.dtype, device=self.device)
self._assert_consistency(
F.simulate_rir_ism,
(room_dim, source, mic_array, max_order, absorption, None, 81, center_frequency, 343.0, 16000.0),
)

@skipIfNoRIR
@parameterized.expand([(1,), (4,)])
def test_simulate_rir_ism_multi_band(self, channel):
room_dim = torch.rand(3, dtype=self.dtype, device=self.device) + 5
mic_array = torch.rand(channel, 3, dtype=self.dtype, device=self.device) + 1
source = torch.rand(3, dtype=self.dtype, device=self.device) + 4
max_order = 3
absorption = torch.rand(7, 6, dtype=self.dtype, device=self.device)
center_frequency = torch.tensor([125, 250, 500, 1000, 2000, 4000, 8000], dtype=self.dtype, device=self.device)
self._assert_consistency(
F.simulate_rir_ism,
(room_dim, source, mic_array, max_order, absorption, None, 81, center_frequency, 343.0, 16000.0),
)
2 changes: 2 additions & 0 deletions tools/setup_helpers/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def _get_build(var, default=False):

_BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
_BUILD_RIR = _get_build("BUILD_RIR", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_BUILD_CTC_DECODER = _get_build("BUILD_CTC_DECODER", True)
_USE_FFMPEG = _get_build("USE_FFMPEG", False)
Expand Down Expand Up @@ -116,6 +117,7 @@ def build_extension(self, ext):
f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
Expand Down
15 changes: 13 additions & 2 deletions torchaudio/_extension/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"_check_cuda_version",
"_IS_TORCHAUDIO_EXT_AVAILABLE",
"_IS_KALDI_AVAILABLE",
"_IS_RIR_AVAILABLE",
"_SOX_INITIALIZED",
"_FFMPEG_INITIALIZED",
]
Expand All @@ -33,16 +34,18 @@
# In case of an error, we do not catch the failure as it suggests there is something
# wrong with the installation.
_IS_TORCHAUDIO_EXT_AVAILABLE = is_module_available("torchaudio.lib._torchaudio")
# Kaldi features are implemented in _torchaudio extension, but it can be individually
# Kaldi and RIR features are implemented in _torchaudio extension, but they can be individually
# turned on/off at build time. Available means that _torchaudio is loaded properly, and
# Kaldi features are found there.
# Kaldi or RIR features are found there.
_IS_RIR_AVAILABLE = False
_IS_KALDI_AVAILABLE = False
if _IS_TORCHAUDIO_EXT_AVAILABLE:
_load_lib("libtorchaudio")

import torchaudio.lib._torchaudio # noqa

_check_cuda_version()
_IS_RIR_AVAILABLE = torchaudio.lib._torchaudio.is_rir_available()
_IS_KALDI_AVAILABLE = torchaudio.lib._torchaudio.is_kaldi_available()


Expand Down Expand Up @@ -88,3 +91,11 @@
)

fail_if_no_ffmpeg = no_op if _FFMPEG_INITIALIZED else _fail_since_no_ffmpeg

fail_if_no_rir = (
no_op
if _IS_RIR_AVAILABLE
else fail_with_message(
"requires RIR extension, but TorchAudio is not compiled with it. Please build TorchAudio with RIR support."
)
)
5 changes: 5 additions & 0 deletions torchaudio/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ if(BUILD_RNNT)
endif()
endif()

if(BUILD_RIR)
list(APPEND sources rir.cpp)
list(APPEND compile_definitions INCLUDE_RIR)
endif()

if(USE_CUDA)
list(
APPEND
Expand Down
1 change: 1 addition & 0 deletions torchaudio/csrc/pybind/pybind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace {

PYBIND11_MODULE(_torchaudio, m) {
m.def("is_kaldi_available", &is_kaldi_available, "");
m.def("is_rir_available", &is_rir_available, "");
m.def("cuda_version", &cuda_version, "");
}

Expand Down
Loading