Skip to content

Commit 1aeebb1

Browse files
committed
[WIP] Add DSP ops
1 parent 087d09b commit 1aeebb1

File tree

7 files changed

+990
-1
lines changed

7 files changed

+990
-1
lines changed

docs/source/index.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ model implementations and application components.
3535
tutorials/audio_data_augmentation_tutorial
3636
tutorials/audio_feature_extractions_tutorial
3737
tutorials/audio_feature_augmentation_tutorial
38+
tutorials/oscillator_tutorial
39+
tutorials/synthesis_tutorial
3840

3941
tutorials/audio_datasets_tutorial
4042

@@ -180,6 +182,20 @@ Tutorials
180182
:link: tutorials/audio_feature_augmentation_tutorial.html
181183
:tags: Preprocessing
182184

185+
.. customcarditem::
186+
:header: Generating waveforms with oscillator
187+
:card_description:
188+
:image: _images/sphx_glr_oscillator_tutorial_003.png
189+
:link: tutorials/oscillator_tutorial.html
190+
:tags: DSP
191+
192+
.. customcarditem::
193+
:header: Sound synthesis with digital signal processing
194+
:card_description:
195+
:image: _images/sphx_glr_synthesis_tutorial_001.png
196+
:link: tutorials/synthesis_tutorial.html
197+
:tags: DSP
198+
183199
.. customcarditem::
184200
:header: Audio dataset
185201
:card_description: Learn how to use <code>torchaudio.datasets</code> module.

docs/source/prototype.functional.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,8 @@ DSP
2626
:toctree: generated
2727
:nosignatures:
2828

29+
adsr_envelope
30+
extend_pitch
31+
apply_time_varying_filter
2932
oscillator_bank
33+
sinc_filter
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Oscillator and ADSR envelope
4+
============================
5+
6+
**Author**: `Moto Hira <moto@meta.com>`__
7+
8+
This tutorial shows how to synthesize various waveform using
9+
:py:func:`~torchaudio.prototype.functional.oscillator_bank` and
10+
:py:func:`~torchaudio.prototype.functional.adsr_envelope`.
11+
"""
12+
13+
import torch
14+
import torchaudio
15+
16+
print(torch.__version__)
17+
print(torchaudio.__version__)
18+
19+
######################################################################
20+
#
21+
22+
import math
23+
import matplotlib.pyplot as plt
24+
from IPython.display import Audio
25+
26+
from torchaudio.prototype.functional import (
27+
oscillator_bank,
28+
adsr_envelope,
29+
)
30+
31+
PI = torch.pi
32+
PI2 = 2 * torch.pi
33+
34+
######################################################################
35+
# Oscillator Bank
36+
# ---------------
37+
#
38+
# Sinusoidal oscillator generates sinusoidal waveforms from given
39+
# amplitudes and frequencies.
40+
#
41+
# .. math::
42+
#
43+
# x_t = A_t \sin \theta_t
44+
#
45+
# Where the phase :math:`\theta_t` is found by integrating the instantaneous
46+
# frequency :math:`f_t`.
47+
#
48+
# .. math::
49+
#
50+
# \theta_t = \sum_{k=1}^{t} f_k
51+
#
52+
# .. note::
53+
#
54+
# Why integrate the frequencies? Instantaneous frequency represents the velocity
55+
# of oscillation at given time. So integrating the instantaneous frequency gives
56+
# the displacement of the phase of the oscillation, since the start.
57+
# In descrete-time signal processing, integration becomes accumuration.
58+
# In PyTorch, accumuration can be computed using :py:func:`torch.cumsum`.
59+
#
60+
# :py:func:`torchaudio.prototype.functional.oscillator_bank` generates a bank of
61+
# sinsuoidal waveforms from amplitude envelopes and instantaneous frequencies.
62+
#
63+
64+
######################################################################
65+
# Simple Sine Wave
66+
# ~~~~~~~~~~~~~~~~
67+
#
68+
# Let's start with simple case.
69+
#
70+
# First, we generate sinusoidal wave that has constant frequency and
71+
# amplitude everywhere, that is, a regular sine wave.
72+
#
73+
74+
######################################################################
75+
#
76+
# We define some constants and helper function that we use for
77+
# the rest of the tutorial.
78+
#
79+
80+
F0 = 344. # fundamental frequency
81+
DURATION = 1.1 # [seconds]
82+
SAMPLE_RATE = 16_000 # [Hz]
83+
84+
NUM_FRAMES = int(DURATION * SAMPLE_RATE)
85+
86+
######################################################################
87+
#
88+
89+
def show(freq, amp, waveform, sample_rate, zoom=None, vol=0.1):
90+
if waveform.ndim == 2:
91+
waveform = waveform[:, 0]
92+
93+
t = torch.arange(waveform.size(0)) / sample_rate
94+
95+
fig, axes = plt.subplots(4, 1, sharex=True)
96+
axes[0].plot(t, freq)
97+
axes[0].set(
98+
title=f"Oscillator bank (bank size: {amp.size(-1)})",
99+
ylabel="Frequency [Hz]",
100+
ylim=[-0.03, None])
101+
axes[1].plot(t, amp)
102+
axes[1].set(
103+
ylabel="Amplitude",
104+
ylim=[-0.03 if torch.all(amp >= 0.0) else None, None])
105+
axes[2].plot(t, waveform)
106+
axes[2].set(ylabel="Waveform")
107+
axes[3].specgram(waveform, Fs=sample_rate)
108+
axes[3].set(
109+
ylabel="Spectrogram",
110+
xlabel="Time [s]",
111+
xlim=[-0.01, t[-1] + 0.01])
112+
113+
for i in range(4):
114+
axes[i].grid(True)
115+
pos = axes[2].get_position()
116+
plt.tight_layout()
117+
118+
if zoom is not None:
119+
ax = fig.add_axes([pos.x0 + 0.01, pos.y0 + 0.03, pos.width / 2.5, pos.height / 2.0])
120+
ax.plot(t, waveform)
121+
ax.set(xlim=zoom, xticks=[], yticks=[])
122+
123+
waveform /= waveform.abs().max()
124+
return Audio(vol * waveform, rate=sample_rate, normalize=False)
125+
126+
127+
######################################################################
128+
#
129+
# Now we synthesis the audio with constant frequency and amplitude
130+
#
131+
132+
freq = torch.full((NUM_FRAMES, 1), F0)
133+
amp = torch.ones((NUM_FRAMES, 1))
134+
135+
waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
136+
137+
show(freq, amp, waveform, SAMPLE_RATE, zoom=(1/F0, 3/F0))
138+
139+
######################################################################
140+
# Multiple sine waves
141+
# ~~~~~~~~~~~~~~~~~~~
142+
#
143+
# :py:func:`~torchaudio.prototype.functional.oscillator_bank` can
144+
# generate arbitrary number of sinusoids.
145+
#
146+
147+
freq = torch.empty((NUM_FRAMES, 3))
148+
freq[:, 0] = F0
149+
freq[:, 1] = 3 * F0
150+
freq[:, 2] = 5 * F0
151+
152+
amp = torch.ones((NUM_FRAMES, 3))
153+
154+
waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
155+
156+
waveform = waveform.mean(-1)
157+
show(freq, amp, waveform, SAMPLE_RATE, zoom=(1/F0, 3/F0))
158+
159+
160+
######################################################################
161+
# Changing Frequencies across time
162+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
163+
#
164+
# Let's change the frequency over time. Here, we change the frequency
165+
# from zero to the Nyquist frequency (half of the sample rate) in
166+
# log-scale so that it is easy to see the change in waveform.
167+
#
168+
169+
nyquist_freq = SAMPLE_RATE / 2
170+
freq = torch.logspace(0, math.log(0.99 * nyquist_freq, 10), NUM_FRAMES).unsqueeze(-1)
171+
amp = torch.ones((NUM_FRAMES, 1))
172+
173+
waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
174+
175+
show(freq, amp, waveform, SAMPLE_RATE, vol=0.03)
176+
177+
######################################################################
178+
#
179+
# We can also oscillate frequency.
180+
#
181+
182+
fm = 2.5 # rate at which the frequency oscillates
183+
f_dev = 0.9 * F0 # the degree of frequency oscillation
184+
185+
freq = F0 + f_dev * torch.sin(torch.linspace(0, fm * PI2 * DURATION, NUM_FRAMES))
186+
freq = freq.unsqueeze(-1)
187+
188+
amp = torch.ones((NUM_FRAMES, 1))
189+
190+
waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
191+
192+
show(freq, amp, waveform, SAMPLE_RATE, vol=0.03)
193+
194+
######################################################################
195+
# ADSR Envelope
196+
# -------------
197+
#
198+
199+
######################################################################
200+
#
201+
# Next, we change the amplitude over time. A common technique to model
202+
# amplitude is ADSR Envelope.
203+
#
204+
# ADSR stands for Attack, Decay, Sustain, and Release.
205+
#
206+
# - `Attack` is the time it takes to reach from zero to the top level.
207+
# - `Decay` is the time it takes from the top to reach sustain level.
208+
# - `Sustain` is the level at which the level stays constant.
209+
# - `Release` is the time it takes to drop to zero from sustain level.
210+
#
211+
# There are many variants of ADSR model, additionally, some models have
212+
# the following properties
213+
#
214+
# - `Hold`: The time the level stays at the top level after attack.
215+
# - non-linear decay/release: The decay and release take non-linear change.
216+
#
217+
# :py:class:`~torchaudio.prototype.functional.adsr_envelope` supports
218+
# hold and polynomial decay.
219+
#
220+
221+
freq = torch.full((NUM_FRAMES, 1), F0)
222+
amp = adsr_envelope(
223+
NUM_FRAMES,
224+
attack=0.2,
225+
hold=0.2,
226+
decay=0.2,
227+
sustain=0.5,
228+
release=0.2,
229+
n_decay=1,
230+
)
231+
amp = amp.unsqueeze(-1)
232+
233+
waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE)
234+
235+
audio = show(freq, amp, waveform, SAMPLE_RATE, vol=0.5)
236+
ax = plt.gcf().axes[1]
237+
ax.annotate("Attack", xy=(0, 0.5))
238+
ax.annotate("Hold", xy=(2.5*DURATION/10, 0.8))
239+
ax.annotate("Decay", xy=(4.5*DURATION/10, 0.5))
240+
ax.annotate("Sustain", xy=(6.5*DURATION/10, 0.3))
241+
ax.annotate("Release", xy=(8.8*DURATION/10, 0.4))
242+
audio
243+
244+
######################################################################
245+
#
246+
# Now let's look into some examples of how ADSR envelope can be used
247+
# to create different sounds.
248+
#
249+
# The following examples are inspired by
250+
# `this article <https://www.edmprod.com/adsr-envelopes/>`__.
251+
#
252+
253+
######################################################################
254+
# Bass Beats
255+
# ~~~~~~~~~~
256+
#
257+
258+
unit = NUM_FRAMES // 3
259+
repeat = 9
260+
261+
freq = torch.empty((unit * repeat, 2))
262+
freq[:, 0] = F0 / 9
263+
freq[:, 1] = F0 / 5
264+
265+
amp = torch.stack(
266+
(
267+
adsr_envelope(unit, attack=0.01, hold=0.125, decay=0.12, sustain=0.05, release=0),
268+
adsr_envelope(unit, attack=0.01, hold=0.25, decay=0.08, sustain=0, release=0),
269+
),
270+
dim=-1)
271+
amp = amp.repeat(repeat, 1)
272+
273+
bass = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE).mean(-1)
274+
275+
show(freq, amp, bass, SAMPLE_RATE, vol=0.3)
276+
277+
######################################################################
278+
# Pluck
279+
# ~~~~~
280+
#
281+
282+
tones = [
283+
513.74, # do
284+
576.65, # re
285+
647.27, # mi
286+
685.76, # fa
287+
769.74, # so
288+
685.76, # fa
289+
647.27, # mi
290+
576.65, # re
291+
513.74, # do
292+
]
293+
294+
freq = torch.cat([torch.full((unit, 1), tone) for tone in tones], dim=0)
295+
amp = adsr_envelope(unit, attack=0, decay=0.7, sustain=0.28, release=0.29)
296+
amp = amp.repeat(9).unsqueeze(-1)
297+
298+
doremi = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE).mean(-1)
299+
300+
show(freq, amp, doremi, SAMPLE_RATE, vol=0.3)
301+
302+
######################################################################
303+
# Riser
304+
# ~~~~~
305+
#
306+
307+
env = adsr_envelope(NUM_FRAMES * 6, attack=0.98, decay=0., sustain=1, release=0.02)
308+
309+
tones = [
310+
484.90, # B4
311+
513.74, # C5
312+
576.65, # D5
313+
1221.88, # D#6/Eb6
314+
3661.50, # A#7/Bb7
315+
6157.89, # G8
316+
]
317+
freq = torch.stack([f * env for f in tones], dim=-1)
318+
319+
amp = env.unsqueeze(-1).expand(freq.shape)
320+
321+
waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE).mean(-1)
322+
323+
show(freq, amp, waveform, SAMPLE_RATE, vol=0.3)
324+
325+
######################################################################
326+
# References
327+
# ----------
328+
#
329+
# - https://www.edmprod.com/adsr-envelopes/
330+
# - https://pages.mtu.edu/~suits/notefreq432.html
331+
# - https://alijamieson.co.uk/2021/12/19/forgive-me-lord-for-i-have-synth-a-guide-to-subtractive-synthesis/
332+
#

0 commit comments

Comments
 (0)