diff --git a/BUILD.gn b/BUILD.gn index 19b503187b4f79..3bf0898a1e21fe 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -775,13 +775,6 @@ group("gn_all") { "//tools/accessibility/inspect:ax_dump_tree", ] } - - # PFFFT. - deps += [ - "//third_party/pffft:fuzzers", - "//third_party/pffft:pffft_benchmark", - "//third_party/pffft:pffft_unittest", - ] } if ((is_linux || is_win) && enable_remoting && !use_ozone) { diff --git a/third_party/pffft/BUILD.gn b/third_party/pffft/BUILD.gn deleted file mode 100644 index ce40159eac8071..00000000000000 --- a/third_party/pffft/BUILD.gn +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2019 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import("//testing/libfuzzer/fuzzer_test.gni") -import("//testing/test.gni") - -# TODO(https://crbug.com/webrtc/9577): Add architecture specific flags. - -config("simd_config") { - # TODO(https://crbug.com/webrtc/9577): Increase fuzzer coverage by using SIMD. - # TODO(https://crbug.com/webrtc/9577): Fix SIMD not detected on android arm64. - # TODO(https://crbug.com/webrtc/9577): Fix SIMD not detected on fuchsia arm64. - if (use_fuzzing_engine || is_android || is_fuchsia) { - cflags = [ "-DPFFFT_SIMD_DISABLE" ] - } -} - -static_library("pffft") { - configs += [ ":simd_config" ] - sources = [ - "src/pffft.c", - "src/pffft.h", - ] -} - -# Fuzzing. - -group("fuzzers") { - testonly = true - deps = [ - ":pffft_complex_fuzzer", - ":pffft_real_fuzzer", - ] -} - -fuzzer_testdata_dir = "$target_gen_dir/testdata" - -action("generate_pffft_fuzzer_corpus") { - script = "generate_seed_corpus.py" - sources = [ - "generate_seed_corpus.py", - ] - args = [ rebase_path(fuzzer_testdata_dir, root_build_dir) ] - outputs = [ - fuzzer_testdata_dir, - ] -} - -fuzzer_test("pffft_complex_fuzzer") { - sources = [ - "pffft_fuzzer.cc", - ] - cflags = [ "-DTRANSFORM_COMPLEX" ] - deps = [ - ":pffft", - ] - seed_corpus = fuzzer_testdata_dir - seed_corpus_deps = [ ":generate_pffft_fuzzer_corpus" ] -} - -fuzzer_test("pffft_real_fuzzer") { - sources = [ - "pffft_fuzzer.cc", - ] - cflags = [ "-DTRANSFORM_REAL" ] - deps = [ - ":pffft", - ] - seed_corpus = fuzzer_testdata_dir - seed_corpus_deps = [ ":generate_pffft_fuzzer_corpus" ] -} - -# Unit tests and benchmark. - -# This target must be used only for testing and benchmark purposes. -static_library("fftpack") { - testonly = true - sources = [ - "src/fftpack.c", - "src/fftpack.h", - ] - visibility = [ ":*" ] -} - -executable("pffft_benchmark") { - configs += [ ":simd_config" ] - testonly = true - sources = [ - "src/test_pffft.c", - ] - deps = [ - ":fftpack", - ":pffft", - ] -} - -test("pffft_unittest") { - sources = [ - "pffft_unittest.cc", - ] - deps = [ - ":fftpack", - ":pffft", - "//testing/gtest", - "//testing/gtest:gtest_main", - ] -} diff --git a/third_party/pffft/DEPS b/third_party/pffft/DEPS deleted file mode 100644 index fca61fcf64b71f..00000000000000 --- a/third_party/pffft/DEPS +++ /dev/null @@ -1,3 +0,0 @@ -include_rules = [ - "+testing/gtest/include/gtest", -] diff --git a/third_party/pffft/README.chromium b/third_party/pffft/README.chromium index 786c590f1ba16b..a0e1164c3df969 100644 --- a/third_party/pffft/README.chromium +++ b/third_party/pffft/README.chromium @@ -13,7 +13,3 @@ Description: PFFFT does 1D Fast Fourier Transforms, of single precision real and complex vectors. It tries do it fast, it tries to be correct, and it tries to be small. Computations do take advantage of SSE1 instructions on x86 cpus, Altivec on powerpc cpus, and NEON on ARM cpus. Local Modifications: - * 01-add_m_pi.diff: define M_PI if not defined - * 02-rmv_printf.diff: remove printf and stop including stdio.h - * 03-decl_validate_simd.diff: declare validate_pffft_simd() in pffft.h - * 04-add_m_ln2.diff: define M_LN2 if not defined diff --git a/third_party/pffft/generate_seed_corpus.py b/third_party/pffft/generate_seed_corpus.py deleted file mode 100755 index c48aa871188797..00000000000000 --- a/third_party/pffft/generate_seed_corpus.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python -# Copyright 2019 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -from __future__ import division -from __future__ import print_function - -from array import array -import os -import random -import sys - -MAX_INPUT_SIZE = int(1e6) -MAX_FLOAT32 = 3.4028235e+38 - - -def IsValidSize(n): - if n == 0: - return False - # PFFFT only supports transforms for inputs of length N of the form - # N = (2^a)*(3^b)*(5^c) where a >= 5, b >=0, c >= 0. - FACTORS = [2, 3, 5] - factorization = [0, 0, 0] - for i, factor in enumerate(FACTORS): - while n % factor == 0: - n = n // factor - factorization[i] += 1 - return factorization[0] >= 5 and n == 1 - - -def WriteFloat32ArrayToFile(file_path, size, generator): - """Generate an array of float32 values and writes to file.""" - with open(file_path, 'wb') as f: - float_array = array('f', [generator() for _ in range(size)]) - float_array.tofile(f) - - -def main(): - if len(sys.argv) < 2: - print('Usage: %s ' % sys.argv[0]) - sys.exit(1) - - output_path = sys.argv[1] - # Create output directory if missing. - if not os.path.exists(output_path): - os.makedirs(output_path) - - # List of valid input sizes. - N = [n for n in range(MAX_INPUT_SIZE) if IsValidSize(n)] - - # Set the seed to always generate the same random data. - random.seed(0) - - # Generate different types of input arrays for each target length. - for n in N: - # Zeros. - WriteFloat32ArrayToFile( - os.path.join(output_path, 'zeros_%d' % n), n, lambda: 0) - # Max float 32. - WriteFloat32ArrayToFile( - os.path.join(output_path, 'max_%d' % n), n, lambda: MAX_FLOAT32) - # Random values in the s16 range. - rnd_s16 = lambda: 32768.0 * 2.0 * (random.random() - 0.5) - WriteFloat32ArrayToFile( - os.path.join(output_path, 'rnd_s16_%d' % n), n, rnd_s16) - - sys.exit(0) - - -if __name__ == '__main__': - main() diff --git a/third_party/pffft/patches/01-add_m_pi.diff b/third_party/pffft/patches/01-add_m_pi.diff deleted file mode 100644 index aa4b98c71475f9..00000000000000 --- a/third_party/pffft/patches/01-add_m_pi.diff +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c -index 0603f2b1f698..7934db448a09 100644 ---- a/third_party/pffft/src/pffft.c -+++ b/third_party/pffft/src/pffft.c -@@ -82,6 +82,9 @@ - # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) - #endif - -+#ifndef M_PI -+#define M_PI 3.14159265358979323846 -+#endif - - /* - vector support macros: the rest of the code is independant of diff --git a/third_party/pffft/patches/02-rmv_printf.diff b/third_party/pffft/patches/02-rmv_printf.diff deleted file mode 100644 index 3a9149461e2c27..00000000000000 --- a/third_party/pffft/patches/02-rmv_printf.diff +++ /dev/null @@ -1,60 +0,0 @@ -diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c -index 7934db448a09..2e0c2f651438 100644 ---- a/third_party/pffft/src/pffft.c -+++ b/third_party/pffft/src/pffft.c -@@ -59,7 +59,7 @@ - - #include "pffft.h" - #include --#include -+// #include - #include - #include - -@@ -222,31 +222,35 @@ void validate_pffft_simd() { - memcpy(a3.f, f+12, 4*sizeof(float)); - - t = a0; u = a1; t.v = VZERO(); -- printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0); -+ // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); -+ assertv4(t, 0, 0, 0, 0); - t.v = VADD(a1.v, a2.v); -- printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18); -+ // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); -+ assertv4(t, 12, 14, 16, 18); - t.v = VMUL(a1.v, a2.v); -- printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); -+ // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); -+ assertv4(t, 32, 45, 60, 77); - t.v = VMADD(a1.v, a2.v,a0.v); -- printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); -+ // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); -+ assertv4(t, 32, 46, 62, 80); - - INTERLEAVE2(a1.v,a2.v,t.v,u.v); -- printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); -+ // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); - assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); - UNINTERLEAVE2(a1.v,a2.v,t.v,u.v); -- printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); -+ // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); - assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11); - - t.v=LD_PS1(f[15]); -- printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); -+ // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 15, 15, 15, 15); - t.v = VSWAPHL(a1.v, a2.v); -- printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); -+ // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 8, 9, 6, 7); - VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v); -- printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", -- a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], -- a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); -+ // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", -+ // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], -+ // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); - assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); - } - #endif //!PFFFT_SIMD_DISABLE diff --git a/third_party/pffft/patches/03-decl_validate_simd.diff b/third_party/pffft/patches/03-decl_validate_simd.diff deleted file mode 100644 index 45315117eefcce..00000000000000 --- a/third_party/pffft/patches/03-decl_validate_simd.diff +++ /dev/null @@ -1,16 +0,0 @@ -diff --git a/third_party/pffft/src/pffft.h b/third_party/pffft/src/pffft.h -index 2bfa7b3ebcfb..bb6f78d4b795 100644 ---- a/third_party/pffft/src/pffft.h -+++ b/third_party/pffft/src/pffft.h -@@ -83,6 +83,11 @@ - extern "C" { - #endif - -+#ifndef PFFFT_SIMD_DISABLE -+ // Detects compiler bugs with respect to simd instruction. -+ void validate_pffft_simd(); -+#endif -+ - /* opaque struct holding internal stuff (precomputed twiddle factors) - this struct can be shared by many threads as it contains only - read-only data. diff --git a/third_party/pffft/patches/04-add_m_ln2.diff b/third_party/pffft/patches/04-add_m_ln2.diff deleted file mode 100644 index 171c12ab065362..00000000000000 --- a/third_party/pffft/patches/04-add_m_ln2.diff +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/third_party/pffft/src/test_pffft.c b/third_party/pffft/src/test_pffft.c -index ab78b4dcc0d5..deaf98ed9f09 100644 ---- a/third_party/pffft/src/test_pffft.c -+++ b/third_party/pffft/src/test_pffft.c -@@ -45,6 +45,10 @@ - # include - #endif - -+#ifndef M_LN2 -+#define M_LN2 0.693147180559945309417 -+#endif -+ - #define MAX(x,y) ((x)>(y)?(x):(y)) - - double frand() { diff --git a/third_party/pffft/pffft_fuzzer.cc b/third_party/pffft/pffft_fuzzer.cc deleted file mode 100644 index 904ad984553e5e..00000000000000 --- a/third_party/pffft/pffft_fuzzer.cc +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2019 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include -#include -#include -#include - -#include "third_party/pffft/src/pffft.h" - -namespace { - -#if defined(TRANSFORM_REAL) -// Real FFT. -constexpr pffft_transform_t kTransform = PFFFT_REAL; -constexpr size_t kSizeOfOneSample = sizeof(float); -#elif defined(TRANSFORM_COMPLEX) -// Complex FFT. -constexpr pffft_transform_t kTransform = PFFFT_COMPLEX; -constexpr size_t kSizeOfOneSample = 2 * sizeof(float); // Real plus imaginary. -#else -#error FFT transform type not defined. -#endif - -bool IsValidSize(size_t n) { - if (n == 0) { - return false; - } - // PFFFT only supports transforms for inputs of length N of the form - // N = (2^a)*(3^b)*(5^c) where a >= 5, b >=0, c >= 0. - constexpr std::array kFactors = {2, 3, 5}; - std::array factorization{}; - for (size_t i = 0; i < kFactors.size(); ++i) { - const int factor = kFactors[i]; - while (n % factor == 0) { - n /= factor; - factorization[i]++; - } - } - return factorization[0] >= 5 && n == 1; -} - -float* AllocatePffftBuffer(size_t number_of_bytes) { - return static_cast(pffft_aligned_malloc(number_of_bytes)); -} - -} // namespace - -// Entry point for LibFuzzer. -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - // Set the number of FFT points to use |data| as input vector. - // The latter is truncated if the number of bytes is not an integer - // multiple of the size of one sample (which is either a real or a complex - // floating point number). - const size_t fft_size = size / kSizeOfOneSample; - if (!IsValidSize(fft_size)) { - return 0; - } - - const size_t number_of_bytes = fft_size * kSizeOfOneSample; - assert(number_of_bytes <= size); - - // Allocate input and output buffers. - float* in = AllocatePffftBuffer(number_of_bytes); - float* out = AllocatePffftBuffer(number_of_bytes); - - // Copy input data. - std::memcpy(in, reinterpret_cast(data), number_of_bytes); - - // Setup FFT. - PFFFT_Setup* pffft_setup = pffft_new_setup(fft_size, kTransform); - - // Call different PFFFT functions to maximize the coverage. - pffft_transform(pffft_setup, in, out, nullptr, PFFFT_FORWARD); - pffft_zconvolve_accumulate(pffft_setup, out, out, out, 1.f); - pffft_transform_ordered(pffft_setup, in, out, nullptr, PFFFT_BACKWARD); - - // Release memory. - pffft_aligned_free(in); - pffft_aligned_free(out); - pffft_destroy_setup(pffft_setup); - - return 0; -} diff --git a/third_party/pffft/pffft_unittest.cc b/third_party/pffft/pffft_unittest.cc deleted file mode 100644 index 3f85c16ccbf897..00000000000000 --- a/third_party/pffft/pffft_unittest.cc +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright 2019 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "testing/gtest/include/gtest/gtest.h" -#include "third_party/pffft/src/fftpack.h" -#include "third_party/pffft/src/pffft.h" - -#define MAX(x, y) ((x) > (y) ? (x) : (y)) - -namespace pffft { -namespace test { -namespace { - -static constexpr int kFftSizes[] = { - 16, 32, 64, 96, 128, 160, 192, 256, 288, 384, 5 * 96, 512, - 576, 5 * 128, 800, 864, 1024, 2048, 2592, 4000, 4096, 12000, 36864}; - -double frand() { - return rand() / (double)RAND_MAX; -} - -void PffftValidate(int fft_size, bool complex_fft) { - PFFFT_Setup* pffft_status = - pffft_new_setup(fft_size, complex_fft ? PFFFT_COMPLEX : PFFFT_REAL); - ASSERT_TRUE(pffft_status) << "FFT size (" << fft_size << ") not supported."; - - int num_floats = fft_size * (complex_fft ? 2 : 1); - int num_bytes = num_floats * sizeof(float); - float* ref = static_cast(pffft_aligned_malloc(num_bytes)); - float* in = static_cast(pffft_aligned_malloc(num_bytes)); - float* out = static_cast(pffft_aligned_malloc(num_bytes)); - float* tmp = static_cast(pffft_aligned_malloc(num_bytes)); - float* tmp2 = static_cast(pffft_aligned_malloc(num_bytes)); - - for (int pass = 0; pass < 2; ++pass) { - SCOPED_TRACE(pass); - float ref_max = 0; - int k; - - // Compute reference solution with FFTPACK. - if (pass == 0) { - float* fftpack_buff = - static_cast(malloc(2 * num_bytes + 15 * sizeof(float))); - for (k = 0; k < num_floats; ++k) { - ref[k] = in[k] = frand() * 2 - 1; - out[k] = 1e30; - } - - if (!complex_fft) { - rffti(fft_size, fftpack_buff); - rfftf(fft_size, ref, fftpack_buff); - // Use our ordering for real FFTs instead of the one of fftpack. - { - float refN = ref[fft_size - 1]; - for (k = fft_size - 2; k >= 1; --k) - ref[k + 1] = ref[k]; - ref[1] = refN; - } - } else { - cffti(fft_size, fftpack_buff); - cfftf(fft_size, ref, fftpack_buff); - } - - free(fftpack_buff); - } - - for (k = 0; k < num_floats; ++k) { - ref_max = MAX(ref_max, fabs(ref[k])); - } - - // Pass 0: non canonical ordering of transform coefficients. - if (pass == 0) { - // Test forward transform, with different input / output. - pffft_transform(pffft_status, in, tmp, 0, PFFFT_FORWARD); - memcpy(tmp2, tmp, num_bytes); - memcpy(tmp, in, num_bytes); - pffft_transform(pffft_status, tmp, tmp, 0, PFFFT_FORWARD); - for (k = 0; k < num_floats; ++k) { - SCOPED_TRACE(k); - EXPECT_EQ(tmp2[k], tmp[k]); - } - - // Test reordering. - pffft_zreorder(pffft_status, tmp, out, PFFFT_FORWARD); - pffft_zreorder(pffft_status, out, tmp, PFFFT_BACKWARD); - for (k = 0; k < num_floats; ++k) { - SCOPED_TRACE(k); - EXPECT_EQ(tmp2[k], tmp[k]); - } - pffft_zreorder(pffft_status, tmp, out, PFFFT_FORWARD); - } else { - // Pass 1: canonical ordering of transform coefficients. - pffft_transform_ordered(pffft_status, in, tmp, 0, PFFFT_FORWARD); - memcpy(tmp2, tmp, num_bytes); - memcpy(tmp, in, num_bytes); - pffft_transform_ordered(pffft_status, tmp, tmp, 0, PFFFT_FORWARD); - for (k = 0; k < num_floats; ++k) { - SCOPED_TRACE(k); - EXPECT_EQ(tmp2[k], tmp[k]); - } - memcpy(out, tmp, num_bytes); - } - - { - for (k = 0; k < num_floats; ++k) { - SCOPED_TRACE(k); - EXPECT_NEAR(ref[k], out[k], 1e-3 * ref_max) << "Forward FFT mismatch"; - } - - if (pass == 0) { - pffft_transform(pffft_status, tmp, out, 0, PFFFT_BACKWARD); - } else { - pffft_transform_ordered(pffft_status, tmp, out, 0, PFFFT_BACKWARD); - } - memcpy(tmp2, out, num_bytes); - memcpy(out, tmp, num_bytes); - if (pass == 0) { - pffft_transform(pffft_status, out, out, 0, PFFFT_BACKWARD); - } else { - pffft_transform_ordered(pffft_status, out, out, 0, PFFFT_BACKWARD); - } - for (k = 0; k < num_floats; ++k) { - assert(tmp2[k] == out[k]); - out[k] *= 1.f / fft_size; - } - for (k = 0; k < num_floats; ++k) { - SCOPED_TRACE(k); - EXPECT_NEAR(in[k], out[k], 1e-3 * ref_max) << "Reverse FFT mismatch"; - } - } - - // Quick test of the circular convolution in FFT domain. - { - float conv_err = 0, conv_max = 0; - - pffft_zreorder(pffft_status, ref, tmp, PFFFT_FORWARD); - memset(out, 0, num_bytes); - pffft_zconvolve_accumulate(pffft_status, ref, ref, out, 1.0); - pffft_zreorder(pffft_status, out, tmp2, PFFFT_FORWARD); - - for (k = 0; k < num_floats; k += 2) { - float ar = tmp[k], ai = tmp[k + 1]; - if (complex_fft || k > 0) { - tmp[k] = ar * ar - ai * ai; - tmp[k + 1] = 2 * ar * ai; - } else { - tmp[0] = ar * ar; - tmp[1] = ai * ai; - } - } - - for (k = 0; k < num_floats; ++k) { - float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); - if (d > conv_err) - conv_err = d; - if (e > conv_max) - conv_max = e; - } - EXPECT_LT(conv_err, 1e-5 * conv_max) << "zconvolve error"; - } - } - - pffft_destroy_setup(pffft_status); - pffft_aligned_free(ref); - pffft_aligned_free(in); - pffft_aligned_free(out); - pffft_aligned_free(tmp); - pffft_aligned_free(tmp2); -} - -} // namespace - -TEST(PffftTest, ValidateReal) { - for (int fft_size : kFftSizes) { - SCOPED_TRACE(fft_size); - if (fft_size == 16) { - continue; - } - PffftValidate(fft_size, /*complex_fft=*/false); - } -} - -TEST(PffftTest, ValidateComplex) { - for (int fft_size : kFftSizes) { - SCOPED_TRACE(fft_size); - PffftValidate(fft_size, /*complex_fft=*/true); - } -} - -} // namespace test -} // namespace pffft diff --git a/third_party/pffft/src/pffft.c b/third_party/pffft/src/pffft.c index 2e0c2f65143859..0603f2b1f6980d 100644 --- a/third_party/pffft/src/pffft.c +++ b/third_party/pffft/src/pffft.c @@ -59,7 +59,7 @@ #include "pffft.h" #include -// #include +#include #include #include @@ -82,9 +82,6 @@ # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) #endif -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif /* vector support macros: the rest of the code is independant of @@ -222,35 +219,31 @@ void validate_pffft_simd() { memcpy(a3.f, f+12, 4*sizeof(float)); t = a0; u = a1; t.v = VZERO(); - // printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 0, 0, 0, 0); + printf("VZERO=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 0, 0, 0, 0); t.v = VADD(a1.v, a2.v); - // printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 12, 14, 16, 18); + printf("VADD(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 12, 14, 16, 18); t.v = VMUL(a1.v, a2.v); - // printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 32, 45, 60, 77); + printf("VMUL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 45, 60, 77); t.v = VMADD(a1.v, a2.v,a0.v); - // printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); - assertv4(t, 32, 46, 62, 80); + printf("VMADD(4:7,8:11,0:3)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 32, 46, 62, 80); INTERLEAVE2(a1.v,a2.v,t.v,u.v); - // printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); + printf("INTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); assertv4(t, 4, 8, 5, 9); assertv4(u, 6, 10, 7, 11); UNINTERLEAVE2(a1.v,a2.v,t.v,u.v); - // printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); + printf("UNINTERLEAVE2(4:7,8:11)=[%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3], u.f[0], u.f[1], u.f[2], u.f[3]); assertv4(t, 4, 6, 8, 10); assertv4(u, 5, 7, 9, 11); t.v=LD_PS1(f[15]); - // printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); + printf("LD_PS1(15)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 15, 15, 15, 15); t.v = VSWAPHL(a1.v, a2.v); - // printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); + printf("VSWAPHL(4:7,8:11)=[%2g %2g %2g %2g]\n", t.f[0], t.f[1], t.f[2], t.f[3]); assertv4(t, 8, 9, 6, 7); VTRANSPOSE4(a0.v, a1.v, a2.v, a3.v); - // printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", - // a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], - // a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); + printf("VTRANSPOSE4(0:3,4:7,8:11,12:15)=[%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g] [%2g %2g %2g %2g]\n", + a0.f[0], a0.f[1], a0.f[2], a0.f[3], a1.f[0], a1.f[1], a1.f[2], a1.f[3], + a2.f[0], a2.f[1], a2.f[2], a2.f[3], a3.f[0], a3.f[1], a3.f[2], a3.f[3]); assertv4(a0, 0, 4, 8, 12); assertv4(a1, 1, 5, 9, 13); assertv4(a2, 2, 6, 10, 14); assertv4(a3, 3, 7, 11, 15); } #endif //!PFFFT_SIMD_DISABLE diff --git a/third_party/pffft/src/pffft.h b/third_party/pffft/src/pffft.h index e235f12e4753fd..2bfa7b3ebcfb1b 100644 --- a/third_party/pffft/src/pffft.h +++ b/third_party/pffft/src/pffft.h @@ -83,113 +83,92 @@ extern "C" { #endif -#ifndef PFFFT_SIMD_DISABLE -// Detects compiler bugs with respect to simd instruction. -void validate_pffft_simd(); -#endif - -/* opaque struct holding internal stuff (precomputed twiddle factors) - this struct can be shared by many threads as it contains only - read-only data. -*/ -typedef struct PFFFT_Setup PFFFT_Setup; - -/* direction of the transform */ -typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; - -/* type of transform */ -typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; - -/* - prepare for performing transforms of size N -- the returned - PFFFT_Setup structure is read-only so it can safely be shared by - multiple concurrent threads. -*/ -PFFFT_Setup* pffft_new_setup(int N, pffft_transform_t transform); -void pffft_destroy_setup(PFFFT_Setup*); -/* - Perform a Fourier transform , The z-domain data is stored in the - most efficient order for transforming it back, or using it for - convolution. If you need to have its content sorted in the - "usual" way, that is as an array of interleaved complex numbers, - either use pffft_transform_ordered , or call pffft_zreorder after - the forward fft, and before the backward fft. - - Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. - Typically you will want to scale the backward transform by 1/N. - - The 'work' pointer should point to an area of N (2*N for complex - fft) floats, properly aligned. If 'work' is NULL, then stack will - be used instead (this is probably the best strategy for small - FFTs, say for N < 16384). - - input and output may alias. -*/ -void pffft_transform(PFFFT_Setup* setup, - const float* input, - float* output, - float* work, - pffft_direction_t direction); - -/* - Similar to pffft_transform, but makes sure that the output is - ordered as expected (interleaved complex numbers). This is - similar to calling pffft_transform and then pffft_zreorder. - - input and output may alias. -*/ -void pffft_transform_ordered(PFFFT_Setup* setup, - const float* input, - float* output, - float* work, - pffft_direction_t direction); - -/* - call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., - PFFFT_FORWARD) if you want to have the frequency components in - the correct "canonical" order, as interleaved complex numbers. - - (for real transforms, both 0-frequency and half frequency - components, which are real, are assembled in the first entry as - F(0)+i*F(n/2+1). Note that the original fftpack did place - F(n/2+1) at the end of the arrays). - - input and output should not alias. -*/ -void pffft_zreorder(PFFFT_Setup* setup, - const float* input, - float* output, - pffft_direction_t direction); - -/* - Perform a multiplication of the frequency components of dft_a and - dft_b and accumulate them into dft_ab. The arrays should have - been obtained with pffft_transform(.., PFFFT_FORWARD) and should - *not* have been reordered with pffft_zreorder (otherwise just - perform the operation yourself as the dft coefs are stored as - interleaved complex numbers). - - the operation performed is: dft_ab += (dft_a * fdt_b)*scaling - - The dft_a, dft_b and dft_ab pointers may alias. -*/ -void pffft_zconvolve_accumulate(PFFFT_Setup* setup, - const float* dft_a, - const float* dft_b, - float* dft_ab, - float scaling); - -/* - the float buffers must have the correct alignment (16-byte boundary - on intel and powerpc). This function may be used to obtain such - correctly aligned buffers. -*/ -void* pffft_aligned_malloc(size_t nb_bytes); -void pffft_aligned_free(void*); - -/* return 4 or 1 wether support SSE/Altivec instructions was enable when - * building pffft.c */ -int pffft_simd_size(); + /* opaque struct holding internal stuff (precomputed twiddle factors) + this struct can be shared by many threads as it contains only + read-only data. + */ + typedef struct PFFFT_Setup PFFFT_Setup; + + /* direction of the transform */ + typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; + + /* type of transform */ + typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; + + /* + prepare for performing transforms of size N -- the returned + PFFFT_Setup structure is read-only so it can safely be shared by + multiple concurrent threads. + */ + PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); + void pffft_destroy_setup(PFFFT_Setup *); + /* + Perform a Fourier transform , The z-domain data is stored in the + most efficient order for transforming it back, or using it for + convolution. If you need to have its content sorted in the + "usual" way, that is as an array of interleaved complex numbers, + either use pffft_transform_ordered , or call pffft_zreorder after + the forward fft, and before the backward fft. + + Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. + Typically you will want to scale the backward transform by 1/N. + + The 'work' pointer should point to an area of N (2*N for complex + fft) floats, properly aligned. If 'work' is NULL, then stack will + be used instead (this is probably the best strategy for small + FFTs, say for N < 16384). + + input and output may alias. + */ + void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); + + /* + Similar to pffft_transform, but makes sure that the output is + ordered as expected (interleaved complex numbers). This is + similar to calling pffft_transform and then pffft_zreorder. + + input and output may alias. + */ + void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); + + /* + call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., + PFFFT_FORWARD) if you want to have the frequency components in + the correct "canonical" order, as interleaved complex numbers. + + (for real transforms, both 0-frequency and half frequency + components, which are real, are assembled in the first entry as + F(0)+i*F(n/2+1). Note that the original fftpack did place + F(n/2+1) at the end of the arrays). + + input and output should not alias. + */ + void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); + + /* + Perform a multiplication of the frequency components of dft_a and + dft_b and accumulate them into dft_ab. The arrays should have + been obtained with pffft_transform(.., PFFFT_FORWARD) and should + *not* have been reordered with pffft_zreorder (otherwise just + perform the operation yourself as the dft coefs are stored as + interleaved complex numbers). + + the operation performed is: dft_ab += (dft_a * fdt_b)*scaling + + The dft_a, dft_b and dft_ab pointers may alias. + */ + void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); + + /* + the float buffers must have the correct alignment (16-byte boundary + on intel and powerpc). This function may be used to obtain such + correctly aligned buffers. + */ + void *pffft_aligned_malloc(size_t nb_bytes); + void pffft_aligned_free(void *); + + /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ + int pffft_simd_size(); #ifdef __cplusplus } diff --git a/third_party/pffft/src/test_pffft.c b/third_party/pffft/src/test_pffft.c index deaf98ed9f0902..512f0ae73acfcc 100644 --- a/third_party/pffft/src/test_pffft.c +++ b/third_party/pffft/src/test_pffft.c @@ -45,10 +45,6 @@ # include #endif -#ifndef M_LN2 -#define M_LN2 0.693147180559945309417 -#endif - #define MAX(x,y) ((x)>(y)?(x):(y)) double frand() { @@ -365,6 +361,10 @@ void benchmark_ffts(int N, int cplx) { pffft_aligned_free(Z); } +#ifndef PFFFT_SIMD_DISABLE +void validate_pffft_simd(); // a small function inside pffft.c that will detect compiler bugs with respect to simd instruction +#endif + int main(int argc, char **argv) { int Nvalues[] = { 64, 96, 128, 160, 192, 256, 384, 5*96, 512, 5*128, 3*256, 800, 1024, 2048, 2400, 4096, 8192, 9*1024, 16384, 32768, 256*1024, 1024*1024, -1 }; int i;