Skip to content

Commit cddb69f

Browse files
committed
TestWaveformSource: Removed final usage of FFTS. Still does final group delay correction, normalization, and noise insertion on the CPU but otherwise GPU accelerated.
1 parent ca6cb78 commit cddb69f

File tree

2 files changed

+65
-54
lines changed

2 files changed

+65
-54
lines changed

scopehal/TestWaveformSource.cpp

Lines changed: 59 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,12 @@ using namespace std;
4242

4343
TestWaveformSource::TestWaveformSource(minstd_rand& rng)
4444
: m_rng(rng)
45+
, m_cachedBinSize(0)
4546
, m_rectangularComputePipeline("shaders/RectangularWindow.spv", 2, sizeof(WindowFunctionArgs))
47+
, m_channelEmulationComputePipeline("shaders/DeEmbedFilter.spv", 3, sizeof(uint32_t))
48+
, m_cachedNumPoints(0)
49+
, m_cachedRawSize(0)
4650
{
47-
#ifndef _APPLE_SILICON
48-
m_reversePlan = NULL;
49-
#endif
50-
51-
m_cachedNumPoints = 0;
52-
m_cachedRawSize = 0;
53-
5451
TouchstoneParser sxp;
5552
sxp.Load(FindDataFile("channels/300mm-s2000m.s2p"), m_sparams);
5653

@@ -66,11 +63,6 @@ TestWaveformSource::TestWaveformSource(minstd_rand& rng)
6663

6764
TestWaveformSource::~TestWaveformSource()
6865
{
69-
#ifndef _APPLE_SILICON
70-
if(m_reversePlan)
71-
ffts_free(m_reversePlan);
72-
m_reversePlan = NULL;
73-
#endif
7466
}
7567

7668
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -308,22 +300,18 @@ void TestWaveformSource::DegradeSerialData(
308300
//RNGs
309301
normal_distribution<> noise(0, noise_amplitude);
310302

311-
// ffts is not available on apple silicon, so for now we only apply noise there
312-
#ifndef _APPLE_SILICON
313303
//Prepare for second pass: reallocate FFT buffer if sample depth changed
314304
const size_t npoints = next_pow2(depth);
315305
size_t nouts = npoints/2 + 1;
306+
bool sizechange = false;
316307
if(m_cachedNumPoints != npoints)
317308
{
318-
if(m_reversePlan)
319-
ffts_free(m_reversePlan);
320-
m_reversePlan = ffts_init_1d_real(npoints, FFTS_BACKWARD);
321-
322309
m_forwardInBuf.resize(npoints);
323310
m_forwardOutBuf.resize(2*nouts);
324311
m_reverseOutBuf.resize(npoints);
325312

326313
m_cachedNumPoints = npoints;
314+
sizechange = true;
327315
}
328316

329317
//Invalidate old vkFFT plans if size has changed
@@ -346,6 +334,18 @@ void TestWaveformSource::DegradeSerialData(
346334

347335
if(lpf)
348336
{
337+
double sample_ghz = 1e6 / sampleperiod;
338+
double bin_hz = round((0.5f * sample_ghz * 1e9f) / nouts);
339+
340+
//Resample our parameter to our FFT bin size if needed.
341+
//Cache trig function output because there's no AVX instructions for this.
342+
if( (fabs(m_cachedBinSize - bin_hz) > FLT_EPSILON) || sizechange)
343+
{
344+
m_resampledSparamCosines.clear();
345+
m_resampledSparamSines.clear();
346+
InterpolateSparameters(bin_hz, nouts);
347+
}
348+
349349
//Prepare to do all of our compute stuff in one dispatch call to reduce overhead
350350
cmdBuf.begin({});
351351

@@ -368,43 +368,30 @@ void TestWaveformSource::DegradeSerialData(
368368
m_vkForwardPlan->AppendForward(m_forwardInBuf, m_forwardOutBuf, cmdBuf);
369369
m_forwardOutBuf.MarkModifiedFromGpu();
370370

371+
//Apply the interpolated S-parameters
372+
m_channelEmulationComputePipeline.BindBufferNonblocking(0, m_forwardOutBuf, cmdBuf);
373+
m_channelEmulationComputePipeline.BindBufferNonblocking(1, m_resampledSparamSines, cmdBuf);
374+
m_channelEmulationComputePipeline.BindBufferNonblocking(2, m_resampledSparamCosines, cmdBuf);
375+
m_channelEmulationComputePipeline.Dispatch(cmdBuf, (uint32_t)nouts, GetComputeBlockCount(npoints, 64));
376+
m_channelEmulationComputePipeline.AddComputeMemoryBarrier(cmdBuf);
377+
m_forwardOutBuf.MarkModifiedFromGpu();
378+
379+
//Do the actual FFT operation
380+
m_vkReversePlan->AppendReverse(m_forwardOutBuf, m_reverseOutBuf, cmdBuf);
381+
m_reverseOutBuf.MarkModifiedFromGpu();
382+
371383
//Done, block until the compute operations finish
372384
cmdBuf.end();
373385
queue->SubmitAndBlock(cmdBuf);
374-
//cap->MarkModifiedFromGpu();
375386

376387
//Next step on the CPU
377-
m_forwardOutBuf.PrepareForCpuAccess();
378-
379-
auto& s21 = m_sparams[SPair(2, 1)];
388+
m_reverseOutBuf.PrepareForCpuAccess();
380389

381390
//Calculate the group delay of the channel at the middle frequency bin
391+
auto& s21 = m_sparams[SPair(2, 1)];
382392
int64_t groupDelay = s21.GetGroupDelay(s21.size() / 2) * FS_PER_SECOND;
383393
int64_t groupDelaySamples = groupDelay / cap->m_timescale;
384394

385-
//Apply the channel
386-
double sample_ghz = 1e6 / sampleperiod;
387-
double bin_hz = round((0.5f * sample_ghz * 1e9f) / nouts);
388-
for(size_t i = 0; i<nouts; i++)
389-
{
390-
float freq = bin_hz * i;
391-
auto pt = s21.InterpolatePoint(freq);
392-
float mag = pt.m_amplitude;
393-
float ang = pt.m_phase;
394-
395-
float sinval = sin(ang) * mag;
396-
float cosval = cos(ang) * mag;
397-
398-
auto real_orig = m_forwardOutBuf[i*2];
399-
auto imag_orig = m_forwardOutBuf[i*2 + 1];
400-
401-
m_forwardOutBuf[i*2] = real_orig * cosval - imag_orig * sinval;
402-
m_forwardOutBuf[i*2 + 1] = real_orig * sinval + imag_orig * cosval;
403-
}
404-
405-
//Calculate the inverse FFT
406-
ffts_execute(m_reversePlan, &m_forwardOutBuf[0], &m_reverseOutBuf[0]);
407-
408395
//Calculate the actual start and end of the samples, accounting for garbage at the beginning of the channel
409396
size_t istart = groupDelaySamples;
410397
size_t iend = depth;
@@ -420,9 +407,35 @@ void TestWaveformSource::DegradeSerialData(
420407
}
421408

422409
else
423-
#endif
424410
{
425411
for(size_t i=0; i<depth; i++)
426412
cap->m_samples[i] += noise(m_rng);
427413
}
428414
}
415+
416+
/**
417+
@brief Recalculate the cached S-parameters
418+
*/
419+
void TestWaveformSource::InterpolateSparameters(float bin_hz, size_t nouts)
420+
{
421+
m_cachedBinSize = bin_hz;
422+
423+
auto& s21 = m_sparams[SPair(2, 1)];
424+
425+
m_resampledSparamSines.resize(nouts);
426+
m_resampledSparamCosines.resize(nouts);
427+
428+
for(size_t i=0; i<nouts; i++)
429+
{
430+
float freq = bin_hz * i;
431+
auto pt = s21.InterpolatePoint(freq);
432+
float mag = pt.m_amplitude;
433+
float ang = pt.m_phase;
434+
435+
m_resampledSparamSines[i] = sin(ang) * mag;
436+
m_resampledSparamCosines[i] = cos(ang) * mag;
437+
}
438+
439+
m_resampledSparamSines.MarkModifiedFromCpu();
440+
m_resampledSparamCosines.MarkModifiedFromCpu();
441+
}

scopehal/TestWaveformSource.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@
3737
#define TestWaveformSource_h
3838

3939
#include "VulkanFFTPlan.h"
40-
41-
#ifndef _APPLE_SILICON
42-
#include <ffts.h>
43-
#endif
4440
#include <random>
4541

4642
/**
@@ -120,17 +116,19 @@ class TestWaveformSource
120116
std::unique_ptr<VulkanFFTPlan> m_vkForwardPlan;
121117
std::unique_ptr<VulkanFFTPlan> m_vkReversePlan;
122118

119+
double m_cachedBinSize;
120+
AcceleratorBuffer<float> m_resampledSparamSines;
121+
AcceleratorBuffer<float> m_resampledSparamCosines;
122+
123123
ComputePipeline m_rectangularComputePipeline;
124+
ComputePipeline m_channelEmulationComputePipeline;
124125

125126
SParameters m_sparams;
126127

127128
size_t m_cachedNumPoints;
128129
size_t m_cachedRawSize;
129130

130-
#ifndef _APPLE_SILICON
131-
//FFT stuff
132-
ffts_plan_t* m_reversePlan;
133-
#endif
131+
void InterpolateSparameters(float bin_hz, size_t nouts);
134132
};
135133

136134
#endif

0 commit comments

Comments
 (0)