|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Oscillator and ADSR envelope |
| 4 | +============================ |
| 5 | +
|
| 6 | +**Author**: `Moto Hira <moto@meta.com>`__ |
| 7 | +
|
| 8 | +This tutorial shows how to synthesize various waveform using |
| 9 | +:py:func:`~torchaudio.prototype.functional.oscillator_bank` and |
| 10 | +:py:func:`~torchaudio.prototype.functional.adsr_envelope`. |
| 11 | +""" |
| 12 | + |
| 13 | +import torch |
| 14 | +import torchaudio |
| 15 | + |
| 16 | +print(torch.__version__) |
| 17 | +print(torchaudio.__version__) |
| 18 | + |
| 19 | +###################################################################### |
| 20 | +# |
| 21 | + |
| 22 | +import math |
| 23 | +import matplotlib.pyplot as plt |
| 24 | +from IPython.display import Audio |
| 25 | + |
| 26 | +from torchaudio.prototype.functional import ( |
| 27 | + oscillator_bank, |
| 28 | + adsr_envelope, |
| 29 | +) |
| 30 | + |
| 31 | +PI = torch.pi |
| 32 | +PI2 = 2 * torch.pi |
| 33 | + |
| 34 | +###################################################################### |
| 35 | +# Oscillator Bank |
| 36 | +# --------------- |
| 37 | +# |
| 38 | +# Sinusoidal oscillator generates sinusoidal waveforms from given |
| 39 | +# amplitudes and frequencies. |
| 40 | +# |
| 41 | +# .. math:: |
| 42 | +# |
| 43 | +# x_t = A_t \sin \theta_t |
| 44 | +# |
| 45 | +# Where the phase :math:`\theta_t` is found by integrating the instantaneous |
| 46 | +# frequency :math:`f_t`. |
| 47 | +# |
| 48 | +# .. math:: |
| 49 | +# |
| 50 | +# \theta_t = \sum_{k=1}^{t} f_k |
| 51 | +# |
| 52 | +# .. note:: |
| 53 | +# |
| 54 | +# Why integrate the frequencies? Instantaneous frequency represents the velocity |
| 55 | +# of oscillation at given time. So integrating the instantaneous frequency gives |
| 56 | +# the displacement of the phase of the oscillation, since the start. |
| 57 | +# In descrete-time signal processing, integration becomes accumuration. |
| 58 | +# In PyTorch, accumuration can be computed using :py:func:`torch.cumsum`. |
| 59 | +# |
| 60 | +# :py:func:`torchaudio.prototype.functional.oscillator_bank` generates a bank of |
| 61 | +# sinsuoidal waveforms from amplitude envelopes and instantaneous frequencies. |
| 62 | +# |
| 63 | + |
| 64 | +###################################################################### |
| 65 | +# Simple Sine Wave |
| 66 | +# ~~~~~~~~~~~~~~~~ |
| 67 | +# |
| 68 | +# Let's start with simple case. |
| 69 | +# |
| 70 | +# First, we generate sinusoidal wave that has constant frequency and |
| 71 | +# amplitude everywhere, that is, a regular sine wave. |
| 72 | +# |
| 73 | + |
| 74 | +###################################################################### |
| 75 | +# |
| 76 | +# We define some constants and helper function that we use for |
| 77 | +# the rest of the tutorial. |
| 78 | +# |
| 79 | + |
| 80 | +F0 = 344. # fundamental frequency |
| 81 | +DURATION = 1.1 # [seconds] |
| 82 | +SAMPLE_RATE = 16_000 # [Hz] |
| 83 | + |
| 84 | +NUM_FRAMES = int(DURATION * SAMPLE_RATE) |
| 85 | + |
| 86 | +###################################################################### |
| 87 | +# |
| 88 | + |
| 89 | +def show(freq, amp, waveform, sample_rate, zoom=None, vol=0.1): |
| 90 | + if waveform.ndim == 2: |
| 91 | + waveform = waveform[:, 0] |
| 92 | + |
| 93 | + t = torch.arange(waveform.size(0)) / sample_rate |
| 94 | + |
| 95 | + fig, axes = plt.subplots(4, 1, sharex=True) |
| 96 | + axes[0].plot(t, freq) |
| 97 | + axes[0].set( |
| 98 | + title=f"Oscillator bank (bank size: {amp.size(-1)})", |
| 99 | + ylabel="Frequency [Hz]", |
| 100 | + ylim=[-0.03, None]) |
| 101 | + axes[1].plot(t, amp) |
| 102 | + axes[1].set( |
| 103 | + ylabel="Amplitude", |
| 104 | + ylim=[-0.03 if torch.all(amp >= 0.0) else None, None]) |
| 105 | + axes[2].plot(t, waveform) |
| 106 | + axes[2].set(ylabel="Waveform") |
| 107 | + axes[3].specgram(waveform, Fs=sample_rate) |
| 108 | + axes[3].set( |
| 109 | + ylabel="Spectrogram", |
| 110 | + xlabel="Time [s]", |
| 111 | + xlim=[-0.01, t[-1] + 0.01]) |
| 112 | + |
| 113 | + for i in range(4): |
| 114 | + axes[i].grid(True) |
| 115 | + pos = axes[2].get_position() |
| 116 | + plt.tight_layout() |
| 117 | + |
| 118 | + if zoom is not None: |
| 119 | + ax = fig.add_axes([pos.x0 + 0.01, pos.y0 + 0.03, pos.width / 2.5, pos.height / 2.0]) |
| 120 | + ax.plot(t, waveform) |
| 121 | + ax.set(xlim=zoom, xticks=[], yticks=[]) |
| 122 | + |
| 123 | + waveform /= waveform.abs().max() |
| 124 | + return Audio(vol * waveform, rate=sample_rate, normalize=False) |
| 125 | + |
| 126 | + |
| 127 | +###################################################################### |
| 128 | +# |
| 129 | +# Now we synthesis the audio with constant frequency and amplitude |
| 130 | +# |
| 131 | + |
| 132 | +freq = torch.full((NUM_FRAMES, 1), F0) |
| 133 | +amp = torch.ones((NUM_FRAMES, 1)) |
| 134 | + |
| 135 | +waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE) |
| 136 | + |
| 137 | +show(freq, amp, waveform, SAMPLE_RATE, zoom=(1/F0, 3/F0)) |
| 138 | + |
| 139 | +###################################################################### |
| 140 | +# Multiple sine waves |
| 141 | +# ~~~~~~~~~~~~~~~~~~~ |
| 142 | +# |
| 143 | +# :py:func:`~torchaudio.prototype.functional.oscillator_bank` can |
| 144 | +# generate arbitrary number of sinusoids. |
| 145 | +# |
| 146 | + |
| 147 | +freq = torch.empty((NUM_FRAMES, 3)) |
| 148 | +freq[:, 0] = F0 |
| 149 | +freq[:, 1] = 3 * F0 |
| 150 | +freq[:, 2] = 5 * F0 |
| 151 | + |
| 152 | +amp = torch.ones((NUM_FRAMES, 3)) |
| 153 | + |
| 154 | +waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE) |
| 155 | + |
| 156 | +waveform = waveform.mean(-1) |
| 157 | +show(freq, amp, waveform, SAMPLE_RATE, zoom=(1/F0, 3/F0)) |
| 158 | + |
| 159 | + |
| 160 | +###################################################################### |
| 161 | +# Changing Frequencies across time |
| 162 | +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 163 | +# |
| 164 | +# Let's change the frequency over time. Here, we change the frequency |
| 165 | +# from zero to the Nyquist frequency (half of the sample rate) in |
| 166 | +# log-scale so that it is easy to see the change in waveform. |
| 167 | +# |
| 168 | + |
| 169 | +nyquist_freq = SAMPLE_RATE / 2 |
| 170 | +freq = torch.logspace(0, math.log(0.99 * nyquist_freq, 10), NUM_FRAMES).unsqueeze(-1) |
| 171 | +amp = torch.ones((NUM_FRAMES, 1)) |
| 172 | + |
| 173 | +waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE) |
| 174 | + |
| 175 | +show(freq, amp, waveform, SAMPLE_RATE, vol=0.03) |
| 176 | + |
| 177 | +###################################################################### |
| 178 | +# |
| 179 | +# We can also oscillate frequency. |
| 180 | +# |
| 181 | + |
| 182 | +fm = 2.5 # rate at which the frequency oscillates |
| 183 | +f_dev = 0.9 * F0 # the degree of frequency oscillation |
| 184 | + |
| 185 | +freq = F0 + f_dev * torch.sin(torch.linspace(0, fm * PI2 * DURATION, NUM_FRAMES)) |
| 186 | +freq = freq.unsqueeze(-1) |
| 187 | + |
| 188 | +amp = torch.ones((NUM_FRAMES, 1)) |
| 189 | + |
| 190 | +waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE) |
| 191 | + |
| 192 | +show(freq, amp, waveform, SAMPLE_RATE, vol=0.03) |
| 193 | + |
| 194 | +###################################################################### |
| 195 | +# ADSR Envelope |
| 196 | +# ------------- |
| 197 | +# |
| 198 | + |
| 199 | +###################################################################### |
| 200 | +# |
| 201 | +# Next, we change the amplitude over time. A common technique to model |
| 202 | +# amplitude is ADSR Envelope. |
| 203 | +# |
| 204 | +# ADSR stands for Attack, Decay, Sustain, and Release. |
| 205 | +# |
| 206 | +# - `Attack` is the time it takes to reach from zero to the top level. |
| 207 | +# - `Decay` is the time it takes from the top to reach sustain level. |
| 208 | +# - `Sustain` is the level at which the level stays constant. |
| 209 | +# - `Release` is the time it takes to drop to zero from sustain level. |
| 210 | +# |
| 211 | +# There are many variants of ADSR model, additionally, some models have |
| 212 | +# the following properties |
| 213 | +# |
| 214 | +# - `Hold`: The time the level stays at the top level after attack. |
| 215 | +# - non-linear decay/release: The decay and release take non-linear change. |
| 216 | +# |
| 217 | +# :py:class:`~torchaudio.prototype.functional.adsr_envelope` supports |
| 218 | +# hold and polynomial decay. |
| 219 | +# |
| 220 | + |
| 221 | +freq = torch.full((NUM_FRAMES, 1), F0) |
| 222 | +amp = adsr_envelope( |
| 223 | + NUM_FRAMES, |
| 224 | + attack=0.2, |
| 225 | + hold=0.2, |
| 226 | + decay=0.2, |
| 227 | + sustain=0.5, |
| 228 | + release=0.2, |
| 229 | + n_decay=1, |
| 230 | +) |
| 231 | +amp = amp.unsqueeze(-1) |
| 232 | + |
| 233 | +waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE) |
| 234 | + |
| 235 | +audio = show(freq, amp, waveform, SAMPLE_RATE, vol=0.5) |
| 236 | +ax = plt.gcf().axes[1] |
| 237 | +ax.annotate("Attack", xy=(0, 0.5)) |
| 238 | +ax.annotate("Hold", xy=(2.5*DURATION/10, 0.8)) |
| 239 | +ax.annotate("Decay", xy=(4.5*DURATION/10, 0.5)) |
| 240 | +ax.annotate("Sustain", xy=(6.5*DURATION/10, 0.3)) |
| 241 | +ax.annotate("Release", xy=(8.8*DURATION/10, 0.4)) |
| 242 | +audio |
| 243 | + |
| 244 | +###################################################################### |
| 245 | +# |
| 246 | +# Now let's look into some examples of how ADSR envelope can be used |
| 247 | +# to create different sounds. |
| 248 | +# |
| 249 | +# The following examples are inspired by |
| 250 | +# `this article <https://www.edmprod.com/adsr-envelopes/>`__. |
| 251 | +# |
| 252 | + |
| 253 | +###################################################################### |
| 254 | +# Bass Beats |
| 255 | +# ~~~~~~~~~~ |
| 256 | +# |
| 257 | + |
| 258 | +unit = NUM_FRAMES // 3 |
| 259 | +repeat = 9 |
| 260 | + |
| 261 | +freq = torch.empty((unit * repeat, 2)) |
| 262 | +freq[:, 0] = F0 / 9 |
| 263 | +freq[:, 1] = F0 / 5 |
| 264 | + |
| 265 | +amp = torch.stack( |
| 266 | + ( |
| 267 | + adsr_envelope(unit, attack=0.01, hold=0.125, decay=0.12, sustain=0.05, release=0), |
| 268 | + adsr_envelope(unit, attack=0.01, hold=0.25, decay=0.08, sustain=0, release=0), |
| 269 | + ), |
| 270 | + dim=-1) |
| 271 | +amp = amp.repeat(repeat, 1) |
| 272 | + |
| 273 | +bass = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE).mean(-1) |
| 274 | + |
| 275 | +show(freq, amp, bass, SAMPLE_RATE, vol=0.3) |
| 276 | + |
| 277 | +###################################################################### |
| 278 | +# Pluck |
| 279 | +# ~~~~~ |
| 280 | +# |
| 281 | + |
| 282 | +tones = [ |
| 283 | + 513.74, # do |
| 284 | + 576.65, # re |
| 285 | + 647.27, # mi |
| 286 | + 685.76, # fa |
| 287 | + 769.74, # so |
| 288 | + 685.76, # fa |
| 289 | + 647.27, # mi |
| 290 | + 576.65, # re |
| 291 | + 513.74, # do |
| 292 | +] |
| 293 | + |
| 294 | +freq = torch.cat([torch.full((unit, 1), tone) for tone in tones], dim=0) |
| 295 | +amp = adsr_envelope(unit, attack=0, decay=0.7, sustain=0.28, release=0.29) |
| 296 | +amp = amp.repeat(9).unsqueeze(-1) |
| 297 | + |
| 298 | +doremi = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE).mean(-1) |
| 299 | + |
| 300 | +show(freq, amp, doremi, SAMPLE_RATE, vol=0.3) |
| 301 | + |
| 302 | +###################################################################### |
| 303 | +# Riser |
| 304 | +# ~~~~~ |
| 305 | +# |
| 306 | + |
| 307 | +env = adsr_envelope(NUM_FRAMES * 6, attack=0.98, decay=0., sustain=1, release=0.02) |
| 308 | + |
| 309 | +tones = [ |
| 310 | + 484.90, # B4 |
| 311 | + 513.74, # C5 |
| 312 | + 576.65, # D5 |
| 313 | + 1221.88, # D#6/Eb6 |
| 314 | + 3661.50, # A#7/Bb7 |
| 315 | + 6157.89, # G8 |
| 316 | +] |
| 317 | +freq = torch.stack([f * env for f in tones], dim=-1) |
| 318 | + |
| 319 | +amp = env.unsqueeze(-1).expand(freq.shape) |
| 320 | + |
| 321 | +waveform = oscillator_bank(freq, amp, sample_rate=SAMPLE_RATE).mean(-1) |
| 322 | + |
| 323 | +show(freq, amp, waveform, SAMPLE_RATE, vol=0.3) |
| 324 | + |
| 325 | +###################################################################### |
| 326 | +# References |
| 327 | +# ---------- |
| 328 | +# |
| 329 | +# - https://www.edmprod.com/adsr-envelopes/ |
| 330 | +# - https://pages.mtu.edu/~suits/notefreq432.html |
| 331 | +# - https://alijamieson.co.uk/2021/12/19/forgive-me-lord-for-i-have-synth-a-guide-to-subtractive-synthesis/ |
| 332 | +# |
0 commit comments