@@ -42,15 +42,12 @@ using namespace std;
42
42
43
43
TestWaveformSource::TestWaveformSource (minstd_rand& rng)
44
44
: m_rng(rng)
45
+ , m_cachedBinSize(0 )
45
46
, m_rectangularComputePipeline(" shaders/RectangularWindow.spv" , 2 , sizeof (WindowFunctionArgs))
47
+ , m_channelEmulationComputePipeline(" shaders/DeEmbedFilter.spv" , 3 , sizeof (uint32_t ))
48
+ , m_cachedNumPoints(0 )
49
+ , m_cachedRawSize(0 )
46
50
{
47
- #ifndef _APPLE_SILICON
48
- m_reversePlan = NULL ;
49
- #endif
50
-
51
- m_cachedNumPoints = 0 ;
52
- m_cachedRawSize = 0 ;
53
-
54
51
TouchstoneParser sxp;
55
52
sxp.Load (FindDataFile (" channels/300mm-s2000m.s2p" ), m_sparams);
56
53
@@ -66,11 +63,6 @@ TestWaveformSource::TestWaveformSource(minstd_rand& rng)
66
63
67
64
TestWaveformSource::~TestWaveformSource ()
68
65
{
69
- #ifndef _APPLE_SILICON
70
- if (m_reversePlan)
71
- ffts_free (m_reversePlan);
72
- m_reversePlan = NULL ;
73
- #endif
74
66
}
75
67
76
68
// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -308,22 +300,18 @@ void TestWaveformSource::DegradeSerialData(
308
300
// RNGs
309
301
normal_distribution<> noise (0 , noise_amplitude);
310
302
311
- // ffts is not available on apple silicon, so for now we only apply noise there
312
- #ifndef _APPLE_SILICON
313
303
// Prepare for second pass: reallocate FFT buffer if sample depth changed
314
304
const size_t npoints = next_pow2 (depth);
315
305
size_t nouts = npoints/2 + 1 ;
306
+ bool sizechange = false ;
316
307
if (m_cachedNumPoints != npoints)
317
308
{
318
- if (m_reversePlan)
319
- ffts_free (m_reversePlan);
320
- m_reversePlan = ffts_init_1d_real (npoints, FFTS_BACKWARD);
321
-
322
309
m_forwardInBuf.resize (npoints);
323
310
m_forwardOutBuf.resize (2 *nouts);
324
311
m_reverseOutBuf.resize (npoints);
325
312
326
313
m_cachedNumPoints = npoints;
314
+ sizechange = true ;
327
315
}
328
316
329
317
// Invalidate old vkFFT plans if size has changed
@@ -346,6 +334,18 @@ void TestWaveformSource::DegradeSerialData(
346
334
347
335
if (lpf)
348
336
{
337
+ double sample_ghz = 1e6 / sampleperiod;
338
+ double bin_hz = round ((0 .5f * sample_ghz * 1e9f) / nouts);
339
+
340
+ // Resample our parameter to our FFT bin size if needed.
341
+ // Cache trig function output because there's no AVX instructions for this.
342
+ if ( (fabs (m_cachedBinSize - bin_hz) > FLT_EPSILON) || sizechange)
343
+ {
344
+ m_resampledSparamCosines.clear ();
345
+ m_resampledSparamSines.clear ();
346
+ InterpolateSparameters (bin_hz, nouts);
347
+ }
348
+
349
349
// Prepare to do all of our compute stuff in one dispatch call to reduce overhead
350
350
cmdBuf.begin ({});
351
351
@@ -368,43 +368,30 @@ void TestWaveformSource::DegradeSerialData(
368
368
m_vkForwardPlan->AppendForward (m_forwardInBuf, m_forwardOutBuf, cmdBuf);
369
369
m_forwardOutBuf.MarkModifiedFromGpu ();
370
370
371
+ // Apply the interpolated S-parameters
372
+ m_channelEmulationComputePipeline.BindBufferNonblocking (0 , m_forwardOutBuf, cmdBuf);
373
+ m_channelEmulationComputePipeline.BindBufferNonblocking (1 , m_resampledSparamSines, cmdBuf);
374
+ m_channelEmulationComputePipeline.BindBufferNonblocking (2 , m_resampledSparamCosines, cmdBuf);
375
+ m_channelEmulationComputePipeline.Dispatch (cmdBuf, (uint32_t )nouts, GetComputeBlockCount (npoints, 64 ));
376
+ m_channelEmulationComputePipeline.AddComputeMemoryBarrier (cmdBuf);
377
+ m_forwardOutBuf.MarkModifiedFromGpu ();
378
+
379
+ // Do the actual FFT operation
380
+ m_vkReversePlan->AppendReverse (m_forwardOutBuf, m_reverseOutBuf, cmdBuf);
381
+ m_reverseOutBuf.MarkModifiedFromGpu ();
382
+
371
383
// Done, block until the compute operations finish
372
384
cmdBuf.end ();
373
385
queue->SubmitAndBlock (cmdBuf);
374
- // cap->MarkModifiedFromGpu();
375
386
376
387
// Next step on the CPU
377
- m_forwardOutBuf.PrepareForCpuAccess ();
378
-
379
- auto & s21 = m_sparams[SPair (2 , 1 )];
388
+ m_reverseOutBuf.PrepareForCpuAccess ();
380
389
381
390
// Calculate the group delay of the channel at the middle frequency bin
391
+ auto & s21 = m_sparams[SPair (2 , 1 )];
382
392
int64_t groupDelay = s21.GetGroupDelay (s21.size () / 2 ) * FS_PER_SECOND;
383
393
int64_t groupDelaySamples = groupDelay / cap->m_timescale ;
384
394
385
- // Apply the channel
386
- double sample_ghz = 1e6 / sampleperiod;
387
- double bin_hz = round ((0 .5f * sample_ghz * 1e9f) / nouts);
388
- for (size_t i = 0 ; i<nouts; i++)
389
- {
390
- float freq = bin_hz * i;
391
- auto pt = s21.InterpolatePoint (freq);
392
- float mag = pt.m_amplitude ;
393
- float ang = pt.m_phase ;
394
-
395
- float sinval = sin (ang) * mag;
396
- float cosval = cos (ang) * mag;
397
-
398
- auto real_orig = m_forwardOutBuf[i*2 ];
399
- auto imag_orig = m_forwardOutBuf[i*2 + 1 ];
400
-
401
- m_forwardOutBuf[i*2 ] = real_orig * cosval - imag_orig * sinval;
402
- m_forwardOutBuf[i*2 + 1 ] = real_orig * sinval + imag_orig * cosval;
403
- }
404
-
405
- // Calculate the inverse FFT
406
- ffts_execute (m_reversePlan, &m_forwardOutBuf[0 ], &m_reverseOutBuf[0 ]);
407
-
408
395
// Calculate the actual start and end of the samples, accounting for garbage at the beginning of the channel
409
396
size_t istart = groupDelaySamples;
410
397
size_t iend = depth;
@@ -420,9 +407,35 @@ void TestWaveformSource::DegradeSerialData(
420
407
}
421
408
422
409
else
423
- #endif
424
410
{
425
411
for (size_t i=0 ; i<depth; i++)
426
412
cap->m_samples [i] += noise (m_rng);
427
413
}
428
414
}
415
+
416
+ /* *
417
+ @brief Recalculate the cached S-parameters
418
+ */
419
+ void TestWaveformSource::InterpolateSparameters (float bin_hz, size_t nouts)
420
+ {
421
+ m_cachedBinSize = bin_hz;
422
+
423
+ auto & s21 = m_sparams[SPair (2 , 1 )];
424
+
425
+ m_resampledSparamSines.resize (nouts);
426
+ m_resampledSparamCosines.resize (nouts);
427
+
428
+ for (size_t i=0 ; i<nouts; i++)
429
+ {
430
+ float freq = bin_hz * i;
431
+ auto pt = s21.InterpolatePoint (freq);
432
+ float mag = pt.m_amplitude ;
433
+ float ang = pt.m_phase ;
434
+
435
+ m_resampledSparamSines[i] = sin (ang) * mag;
436
+ m_resampledSparamCosines[i] = cos (ang) * mag;
437
+ }
438
+
439
+ m_resampledSparamSines.MarkModifiedFromCpu ();
440
+ m_resampledSparamCosines.MarkModifiedFromCpu ();
441
+ }
0 commit comments