Skip to content

Commit 33415b3

Browse files
authored
Merge pull request #33 from shirok1/master
Memory Usage Improvements
2 parents 9ac3082 + 59a4662 commit 33415b3

File tree

5 files changed

+224
-7
lines changed

5 files changed

+224
-7
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ You should customize your file-reading method to suit your specific application.
214214
{
215215
using var afr = new NAudio.Wave.AudioFileReader(filePath);
216216
int sampleRate = afr.WaveFormat.SampleRate;
217-
int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8);
217+
int bytesPerSample = afr.WaveFormat.BitsPerSample / 8;
218+
int sampleCount = (int)(afr.Length / bytesPerSample);
218219
int channelCount = afr.WaveFormat.Channels;
219220
var audio = new List<double>(sampleCount);
220221
var buffer = new float[sampleRate * channelCount];

dev/python/readwav.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""
2+
sample rate: 44100
3+
values: 166671
4+
value 12345: 4435
5+
"""
6+
from scipy.io import wavfile
7+
import pathlib
8+
PATH_HERE = pathlib.Path(__file__).parent
9+
PATH_DATA = PATH_HERE.joinpath("../../data")
10+
11+
if __name__ == "__main__":
12+
for wavFilePath in PATH_DATA.glob("*.wav"):
13+
wavFilePath = PATH_DATA.joinpath(wavFilePath)
14+
samplerate, data = wavfile.read(wavFilePath)
15+
print(f"{wavFilePath.name}, {samplerate}, {len(data)}")

src/Spectrogram.Tests/AudioFile.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ namespace Spectrogram.Tests
77
{
88
public static class AudioFile
99
{
10+
/// <summary>
11+
/// Use NAudio to read the contents of a WAV file.
12+
/// </summary>
1013
public static (double[] audio, int sampleRate) ReadWAV(string filePath, double multiplier = 16_000)
1114
{
1215
using var afr = new NAudio.Wave.AudioFileReader(filePath);
1316
int sampleRate = afr.WaveFormat.SampleRate;
14-
int sampleCount = (int)(afr.Length / afr.WaveFormat.BitsPerSample / 8);
17+
int bytesPerSample = afr.WaveFormat.BitsPerSample / 8;
18+
int sampleCount = (int)afr.Length / bytesPerSample;
1519
int channelCount = afr.WaveFormat.Channels;
1620
var audio = new List<double>(sampleCount);
1721
var buffer = new float[sampleRate * channelCount];
@@ -21,6 +25,9 @@ public static (double[] audio, int sampleRate) ReadWAV(string filePath, double m
2125
return (audio.ToArray(), sampleRate);
2226
}
2327

28+
/// <summary>
29+
/// Use MP3Sharp to read the contents of an MP3 file.
30+
/// </summary>
2431
public static double[] ReadMP3(string filePath, int bufferSize = 4096)
2532
{
2633
List<double> audio = new List<double>();
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
using NUnit.Framework;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Text;
5+
6+
namespace Spectrogram.Tests
7+
{
8+
class AudioFileTests
9+
{
10+
/// <summary>
11+
/// Compare values read from the WAV reader against those read by Python's SciPy module (see script in /dev folder)
12+
/// </summary>
13+
[TestCase("cant-do-that-44100.wav", 44_100, 166_671, 1)]
14+
[TestCase("03-02-03-01-02-01-19.wav", 48_000, 214_615, 1)]
15+
[TestCase("qrss-10min.wav", 6_000, 3_600_000, 1)]
16+
[TestCase("cant-do-that-11025-stereo.wav", 11_025, 41668, 2)]
17+
[TestCase("asehgal-original.wav", 40_000, 1_600_000, 1)]
18+
public void Test_AudioFile_LengthAndSampleRate(string filename, int knownRate, int knownLength, int channels)
19+
{
20+
string filePath = $"../../../../../data/{filename}";
21+
(double[] audio, int sampleRate) = AudioFile.ReadWAV(filePath);
22+
23+
Assert.AreEqual(knownRate, sampleRate);
24+
Assert.AreEqual(knownLength, audio.Length / channels);
25+
}
26+
}
27+
}

src/Spectrogram/SpectrogramGenerator.cs

Lines changed: 172 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,104 @@ namespace Spectrogram
99
{
1010
public class SpectrogramGenerator
1111
{
12+
/// <summary>
13+
/// Number of pixel columns (FFT samples) in the spectrogram image
14+
/// </summary>
1215
public int Width { get { return ffts.Count; } }
16+
17+
/// <summary>
18+
/// Number of pixel rows (frequency bins) in the spectrogram image
19+
/// </summary>
1320
public int Height { get { return settings.Height; } }
21+
22+
/// <summary>
23+
/// Number of samples to use for each FFT (must be a power of 2)
24+
/// </summary>
1425
public int FftSize { get { return settings.FftSize; } }
26+
27+
/// <summary>
28+
/// Vertical resolution (frequency bin size depends on FftSize and SampleRate)
29+
/// </summary>
1530
public double HzPerPx { get { return settings.HzPerPixel; } }
31+
32+
/// <summary>
33+
/// Horizontal resolution (seconds per pixel depends on StepSize)
34+
/// </summary>
1635
public double SecPerPx { get { return settings.StepLengthSec; } }
36+
37+
/// <summary>
38+
/// Number of FFTs that remain to be processed for data which has been added but not yet analuyzed
39+
/// </summary>
1740
public int FftsToProcess { get { return (newAudio.Count - settings.FftSize) / settings.StepSize; } }
41+
42+
/// <summary>
43+
/// Total number of FFT steps processed
44+
/// </summary>
1845
public int FftsProcessed { get; private set; }
46+
47+
/// <summary>
48+
/// Index of the pixel column which will be populated next. Location of vertical line for wrap-around displays.
49+
/// </summary>
1950
public int NextColumnIndex { get { return (FftsProcessed + rollOffset) % Width; } }
51+
52+
/// <summary>
53+
/// This value is added to displayed frequency axis tick labels
54+
/// </summary>
2055
public int OffsetHz { get { return settings.OffsetHz; } set { settings.OffsetHz = value; } }
56+
57+
/// <summary>
58+
/// Number of samples per second
59+
/// </summary>
2160
public int SampleRate { get { return settings.SampleRate; } }
61+
62+
/// <summary>
63+
/// Number of samples to step forward after each FFT is processed.
64+
/// This value controls the horizontal resolution of the spectrogram.
65+
/// </summary>
2266
public int StepSize { get { return settings.StepSize; } }
67+
68+
/// <summary>
69+
/// The spectrogram is trimmed to cut-off frequencies below this value.
70+
/// </summary>
2371
public double FreqMax { get { return settings.FreqMax; } }
72+
73+
/// <summary>
74+
/// The spectrogram is trimmed to cut-off frequencies above this value.
75+
/// </summary>
2476
public double FreqMin { get { return settings.FreqMin; } }
2577

2678
private readonly Settings settings;
2779
private readonly List<double[]> ffts = new List<double[]>();
28-
private readonly List<double> newAudio = new List<double>();
80+
private readonly List<double> newAudio;
2981
private Colormap cmap = Colormap.Viridis;
3082

31-
public SpectrogramGenerator(int sampleRate, int fftSize, int stepSize,
32-
double minFreq = 0, double maxFreq = double.PositiveInfinity,
33-
int? fixedWidth = null, int offsetHz = 0)
83+
/// <summary>
84+
/// Instantiate a spectrogram generator.
85+
/// This module calculates the FFT over a moving window as data comes in.
86+
/// Using the Add() method to load new data and process it as it arrives.
87+
/// </summary>
88+
/// <param name="sampleRate">Number of samples per second (Hz)</param>
89+
/// <param name="fftSize">Number of samples to use for each FFT operation. This value must be a power of 2.</param>
90+
/// <param name="stepSize">Number of samples to step forward</param>
91+
/// <param name="minFreq">Frequency data lower than this value (Hz) will not be stored</param>
92+
/// <param name="maxFreq">Frequency data higher than this value (Hz) will not be stored</param>
93+
/// <param name="fixedWidth">Spectrogram output will always be sized to this width (column count)</param>
94+
/// <param name="offsetHz">This value will be added to displayed frequency axis tick labels</param>
95+
/// <param name="initialAudioList">Analyze this data immediately (alternative to calling Add() later)</param>
96+
public SpectrogramGenerator(
97+
int sampleRate,
98+
int fftSize,
99+
int stepSize,
100+
double minFreq = 0,
101+
double maxFreq = double.PositiveInfinity,
102+
int? fixedWidth = null,
103+
int offsetHz = 0,
104+
List<double> initialAudioList = null)
34105
{
35106
settings = new Settings(sampleRate, fftSize, stepSize, minFreq, maxFreq, offsetHz);
36107

108+
newAudio = initialAudioList ?? new List<double>();
109+
37110
if (fixedWidth.HasValue)
38111
SetFixedWidth(fixedWidth.Value);
39112
}
@@ -56,11 +129,18 @@ public override string ToString()
56129
$"overlap: {settings.StepOverlapFrac * 100:N0}%";
57130
}
58131

132+
/// <summary>
133+
/// Set the colormap to use for future renders
134+
/// </summary>
59135
public void SetColormap(Colormap cmap)
60136
{
61137
this.cmap = cmap ?? this.cmap;
62138
}
63139

140+
/// <summary>
141+
/// Load a custom window kernel to multiply against each FFT sample prior to processing.
142+
/// Windows must be at least the length of FftSize and typically have a sum of 1.0.
143+
/// </summary>
64144
public void SetWindow(double[] newWindow)
65145
{
66146
if (newWindow.Length > settings.FftSize)
@@ -82,19 +162,36 @@ public void AddCircular(float[] values) { }
82162
[Obsolete("use the Add() method", true)]
83163
public void AddScroll(float[] values) { }
84164

85-
public void Add(double[] audio, bool process = true)
165+
/// <summary>
166+
/// Load new data into the spectrogram generator
167+
/// </summary>
168+
public void Add(IEnumerable<double> audio, bool process = true)
86169
{
87170
newAudio.AddRange(audio);
88171
if (process)
89172
Process();
90173
}
91174

175+
/// <summary>
176+
/// The roll offset is used to calculate NextColumnIndex and can be set to a positive number
177+
/// to begin adding new columns to the center of the spectrogram.
178+
/// This can also be used to artificially move the next column index to zero even though some
179+
/// data has already been accumulated.
180+
/// </summary>
92181
private int rollOffset = 0;
182+
183+
/// <summary>
184+
/// Reset the next column index such that the next processed FFT will appear at the far left of the spectrogram.
185+
/// </summary>
186+
/// <param name="offset"></param>
93187
public void RollReset(int offset = 0)
94188
{
95189
rollOffset = -FftsProcessed + offset;
96190
}
97191

192+
/// <summary>
193+
/// Perform FFT analysis on all unprocessed data
194+
/// </summary>
98195
public double[][] Process()
99196
{
100197
if (FftsToProcess < 1)
@@ -127,6 +224,10 @@ public double[][] Process()
127224
return newFfts;
128225
}
129226

227+
/// <summary>
228+
/// Return a list of the mel-scaled FFTs contained in this spectrogram
229+
/// </summary>
230+
/// <param name="melBinCount">Total number of output bins to use. Choose a value significantly smaller than Height.</param>
130231
public List<double[]> GetMelFFTs(int melBinCount)
131232
{
132233
if (settings.FreqMin != 0)
@@ -139,15 +240,44 @@ public List<double[]> GetMelFFTs(int melBinCount)
139240
return fftsMel;
140241
}
141242

243+
/// <summary>
244+
/// Create and return a spectrogram bitmap from the FFTs stored in memory.
245+
/// </summary>
246+
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
247+
/// <param name="dB">If true, output will be log-transformed.</param>
248+
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
249+
/// <param name="roll">Behavior of the spectrogram when it is full of data.
250+
/// Roll (true) adds new columns on the left overwriting the oldest ones.
251+
/// Scroll (false) slides the whole image to the left and adds new columns to the right.</param>
142252
public Bitmap GetBitmap(double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) =>
143253
Image.GetBitmap(ffts, cmap, intensity, dB, dBScale, roll, NextColumnIndex);
144254

255+
/// <summary>
256+
/// Create a Mel-scaled spectrogram.
257+
/// </summary>
258+
/// <param name="melBinCount">Total number of output bins to use. Choose a value significantly smaller than Height.</param>
259+
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
260+
/// <param name="dB">If true, output will be log-transformed.</param>
261+
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
262+
/// <param name="roll">Behavior of the spectrogram when it is full of data.
263+
/// Roll (true) adds new columns on the left overwriting the oldest ones.
264+
/// Scroll (false) slides the whole image to the left and adds new columns to the right.</param>
145265
public Bitmap GetBitmapMel(int melBinCount = 25, double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false) =>
146266
Image.GetBitmap(GetMelFFTs(melBinCount), cmap, intensity, dB, dBScale, roll, NextColumnIndex);
147267

148268
[Obsolete("use SaveImage()", true)]
149269
public void SaveBitmap(Bitmap bmp, string fileName) { }
150270

271+
/// <summary>
272+
/// Generate the spectrogram and save it as an image file.
273+
/// </summary>
274+
/// <param name="fileName">Path of the file to save.</param>
275+
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
276+
/// <param name="dB">If true, output will be log-transformed.</param>
277+
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
278+
/// <param name="roll">Behavior of the spectrogram when it is full of data.
279+
/// Roll (true) adds new columns on the left overwriting the oldest ones.
280+
/// Scroll (false) slides the whole image to the left and adds new columns to the right.</param>
151281
public void SaveImage(string fileName, double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false)
152282
{
153283
if (ffts.Count == 0)
@@ -170,6 +300,15 @@ public void SaveImage(string fileName, double intensity = 1, bool dB = false, do
170300
Image.GetBitmap(ffts, cmap, intensity, dB, dBScale, roll, NextColumnIndex).Save(fileName, fmt);
171301
}
172302

303+
/// <summary>
304+
/// Create and return a spectrogram bitmap from the FFTs stored in memory.
305+
/// The output will be scaled-down vertically by binning according to a reduction factor and keeping the brightest pixel value in each bin.
306+
/// </summary>
307+
/// <param name="intensity">Multiply the output by a fixed value to change its brightness.</param>
308+
/// <param name="dB">If true, output will be log-transformed.</param>
309+
/// <param name="dBScale">If dB scaling is in use, this multiplier will be applied before log transformation.</param>
310+
/// <param name="roll">Behavior of the spectrogram when it is full of data.
311+
/// <param name="reduction"></param>
173312
public Bitmap GetBitmapMax(double intensity = 1, bool dB = false, double dBScale = 1, bool roll = false, int reduction = 4)
174313
{
175314
List<double[]> ffts2 = new List<double[]>();
@@ -185,14 +324,25 @@ public Bitmap GetBitmapMax(double intensity = 1, bool dB = false, double dBScale
185324
return Image.GetBitmap(ffts2, cmap, intensity, dB, dBScale, roll, NextColumnIndex);
186325
}
187326

327+
/// <summary>
328+
/// Export spectrogram data using the Spectrogram File Format (SFF)
329+
/// </summary>
188330
public void SaveData(string filePath, int melBinCount = 0)
189331
{
190332
if (!filePath.EndsWith(".sff", StringComparison.OrdinalIgnoreCase))
191333
filePath += ".sff";
192334
new SFF(this, melBinCount).Save(filePath);
193335
}
194336

337+
/// <summary>
338+
/// Defines the total number of FFTs (spectrogram columns) to store in memory. Determines Width.
339+
/// </summary>
195340
private int fixedWidth = 0;
341+
342+
/// <summary>
343+
/// Configure the Spectrogram to maintain a fixed number of pixel columns.
344+
/// Zeros will be added to padd existing data to achieve this width, and extra columns will be deleted.
345+
/// </summary>
196346
public void SetFixedWidth(int width)
197347
{
198348
fixedWidth = width;
@@ -212,11 +362,21 @@ private void PadOrTrimForFixedWidth()
212362
}
213363
}
214364

365+
/// <summary>
366+
/// Get a vertical image containing ticks and tick labels for the frequency axis.
367+
/// </summary>
368+
/// <param name="width">size (pixels)</param>
369+
/// <param name="offsetHz">number to add to each tick label</param>
370+
/// <param name="tickSize">length of each tick mark (pixels)</param>
371+
/// <param name="reduction">bin size for vertical data reduction</param>
215372
public Bitmap GetVerticalScale(int width, int offsetHz = 0, int tickSize = 3, int reduction = 1)
216373
{
217374
return Scale.Vertical(width, settings, offsetHz, tickSize, reduction);
218375
}
219376

377+
/// <summary>
378+
/// Return the vertical position (pixel units) for the given frequency
379+
/// </summary>
220380
public int PixelY(double frequency, int reduction = 1)
221381
{
222382
int pixelsFromZeroHz = (int)(settings.PxPerHz * frequency / reduction);
@@ -225,11 +385,18 @@ public int PixelY(double frequency, int reduction = 1)
225385
return pixelRow - 1;
226386
}
227387

388+
/// <summary>
389+
/// Return a list of the FFTs in memory underlying the spectrogram
390+
/// </summary>
228391
public List<double[]> GetFFTs()
229392
{
230393
return ffts;
231394
}
232395

396+
/// <summary>
397+
/// Return frequency and magnitude of the dominant frequency.
398+
/// </summary>
399+
/// <param name="latestFft">If true, only the latest FFT will be assessed.</param>
233400
public (double freqHz, double magRms) GetPeak(bool latestFft = true)
234401
{
235402
if (ffts.Count == 0)

0 commit comments

Comments
 (0)