Skip to content

Commit e376510

Browse files
committed
no always_inline, make compatible with Codeforces...
1 parent 04d63ed commit e376510

File tree

4 files changed

+71
-46
lines changed

4 files changed

+71
-46
lines changed

cp-algo/structures/bit_array.hpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#ifndef CP_ALGO_STRUCTURES_BIT_ARRAY_HPP
22
#define CP_ALGO_STRUCTURES_BIT_ARRAY_HPP
33
#include "../util/bit.hpp"
4-
#include "../util/bump_alloc.hpp"
54
#include <cassert>
65
namespace cp_algo::structures {
76
template<typename C>
@@ -13,7 +12,7 @@ namespace cp_algo::structures {
1312
size_t words, n;
1413
alignas(32) Cont data;
1514

16-
void resize(size_t N) {
15+
constexpr void resize(size_t N) {
1716
n = N;
1817
words = (n + width - 1) / width;
1918
if constexpr (Resizable<Cont>) {
@@ -23,52 +22,53 @@ namespace cp_algo::structures {
2322
}
2423
}
2524

26-
_bit_array(): n(0), words(0), data() {}
27-
_bit_array(size_t N): data() {
25+
constexpr _bit_array(): n(0), words(0), data() {}
26+
constexpr _bit_array(size_t N): data() {
2827
resize(N);
2928
}
3029

31-
uint64_t& word(size_t x) {
30+
constexpr uint64_t& word(size_t x) {
3231
return data[x];
3332
}
34-
uint64_t word(size_t x) const {
33+
constexpr uint64_t word(size_t x) const {
3534
return data[x];
3635
}
37-
void set(size_t x) {
36+
constexpr void set_all(uint64_t val = -1) {
37+
for(auto& w: data) {w = val;}
38+
}
39+
constexpr void reset() {
40+
set_all(0);
41+
}
42+
constexpr void set(size_t x) {
3843
word(x / width) |= 1ULL << (x % width);
3944
}
40-
void reset(size_t x) {
45+
constexpr void reset(size_t x) {
4146
word(x / width) &= ~(1ULL << (x % width));
4247
}
43-
void reset() {
44-
for(auto& w: data) {
45-
w = 0;
46-
}
47-
}
48-
void flip(size_t x) {
48+
constexpr void flip(size_t x) {
4949
word(x / width) ^= 1ULL << (x % width);
5050
}
51-
bool test(size_t x) const {
51+
constexpr bool test(size_t x) const {
5252
return (word(x / width) >> (x % width)) & 1;
5353
}
54-
bool operator[](size_t x) const {
54+
constexpr bool operator[](size_t x) const {
5555
return test(x);
5656
}
57-
size_t size() const {
57+
constexpr size_t size() const {
5858
return n;
5959
}
6060
};
6161

62-
template<int N>
62+
template<size_t N>
6363
struct bit_array: _bit_array<std::array<uint64_t, (N + 63) / 64>> {
6464
using Base = _bit_array<std::array<uint64_t, (N + 63) / 64>>;
6565
using Base::Base, Base::words, Base::data;
66-
bit_array(): Base(N) {}
66+
constexpr bit_array(): Base(N) {}
6767
};
6868
struct dynamic_bit_array: _bit_array<std::vector<uint64_t>> {
6969
using Base = _bit_array<std::vector<uint64_t>>;
7070
using Base::Base, Base::words;
71-
dynamic_bit_array(size_t N): Base(N) {
71+
constexpr dynamic_bit_array(size_t N): Base(N) {
7272
data.resize(words);
7373
}
7474
};

cp-algo/structures/bitpack.hpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,27 @@ namespace cp_algo::structures {
1313
using Base::Base, Base::width, Base::words, Base::data, Base::n, Base::word;
1414
auto operator <=> (_bitpack const& t) const = default;
1515

16-
_bitpack(std::string &bits): _bitpack(std::size(bits)) {
16+
constexpr _bitpack(std::string &bits): _bitpack(std::size(bits)) {
1717
bits += std::string(-std::size(bits) % width, '0');
1818
for(size_t i = 0; i < words; i++) {
1919
word(i) = read_bits64(bits.data() + i * width);
2020
}
2121
}
2222

23-
_bitpack& xor_hint(_bitpack const& t, size_t hint) {
23+
constexpr _bitpack& xor_hint(_bitpack const& t, size_t hint) {
2424
for(size_t i = hint / width; i < std::size(data); i++) {
2525
data[i] ^= t.data[i];
2626
}
2727
return *this;
2828
}
29-
_bitpack& operator ^= (_bitpack const& t) {
29+
constexpr _bitpack& operator ^= (_bitpack const& t) {
3030
return xor_hint(t, 0);
3131
}
32-
_bitpack operator ^ (_bitpack const& t) const {
32+
constexpr _bitpack operator ^ (_bitpack const& t) const {
3333
return _bitpack(*this) ^= t;
3434
}
3535

36-
std::string to_string() const {
36+
constexpr std::string to_string() const {
3737
std::string res(words * width, '0');
3838
for(size_t i = 0; i < words; i++) {
3939
write_bits64(res.data() + i * width, word(i));
@@ -42,7 +42,20 @@ namespace cp_algo::structures {
4242
return res;
4343
}
4444

45-
size_t ctz() const {
45+
constexpr size_t count(size_t n) const {
46+
size_t res = 0;
47+
for(size_t i = 0; i < n / width; i++) {
48+
res += std::popcount(word(i));
49+
}
50+
if (n % width) {
51+
res += std::popcount(word(n / width) & mask(n % width));
52+
}
53+
return res;
54+
}
55+
constexpr size_t count() const {
56+
return count(n);
57+
}
58+
constexpr size_t ctz() const {
4659
size_t res = 0;
4760
size_t i = 0;
4861
while(i < words && word(i) == 0) {
@@ -56,7 +69,7 @@ namespace cp_algo::structures {
5669
}
5770
};
5871

59-
template<int N>
72+
template<size_t N>
6073
using bitpack = _bitpack<bit_array<N>>;
6174
using dynamic_bitpack = _bitpack<dynamic_bit_array>;
6275
}

cp-algo/util/bit.hpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ namespace cp_algo {
88
template<typename Uint>
99
constexpr size_t bit_width = sizeof(Uint) * 8;
1010

11+
// n < 64
12+
uint64_t mask(size_t n) {
13+
return (1ULL << n) - 1;
14+
}
1115
size_t order_of_bit(auto x, size_t k) {
12-
return k ? std::popcount(x << (bit_width<decltype(x)> - k)) : 0;
16+
return std::popcount(x << ( -k % bit_width<decltype(x)>));
1317
}
14-
[[gnu::target("bmi2")]]
15-
size_t kth_set_bit(uint64_t x, size_t k) {
18+
[[gnu::target("bmi2")]] inline size_t kth_set_bit(uint64_t x, size_t k) {
1619
return std::countr_zero(_pdep_u64(1ULL << k, x));
1720
}
1821
template<int fl = 0>
@@ -25,15 +28,24 @@ namespace cp_algo {
2528
callback.template operator()<1ULL << fl>();
2629
}
2730
}
31+
void with_bit_ceil(size_t n, auto &&callback) {
32+
with_bit_floor(n, [&]<size_t N>() {
33+
if(N == n) {
34+
callback.template operator()<N>();
35+
} else {
36+
callback.template operator()<N << 1>();
37+
}
38+
});
39+
}
2840

29-
[[gnu::target("avx2"), gnu::always_inline]] inline uint32_t read_bits(char const* p) {
41+
[[gnu::target("avx2")]] inline uint32_t read_bits(char const* p) {
3042
return _mm256_movemask_epi8(__m256i(vector_cast<u8x32 const>(p[0]) + (127 - '0')));
3143
}
32-
[[gnu::always_inline]] inline uint64_t read_bits64(char const* p) {
44+
[[gnu::target("avx2")]] inline uint64_t read_bits64(char const* p) {
3345
return read_bits(p) | (uint64_t(read_bits(p + 32)) << 32);
3446
}
3547

36-
[[gnu::target("avx2"), gnu::always_inline]] inline void write_bits(char *p, uint32_t bits) {
48+
[[gnu::target("avx2")]] inline void write_bits(char *p, uint32_t bits) {
3749
static constexpr u8x32 shuffler = {
3850
0, 0, 0, 0, 0, 0, 0, 0,
3951
1, 1, 1, 1, 1, 1, 1, 1,
@@ -51,7 +63,7 @@ namespace cp_algo {
5163
p[z] = shuffled[z] & mask[z] ? '1' : '0';
5264
}
5365
}
54-
[[gnu::target("avx2"), gnu::always_inline]] inline void write_bits64(char *p, uint64_t bits) {
66+
[[gnu::target("avx2")]] inline void write_bits64(char *p, uint64_t bits) {
5567
write_bits(p, uint32_t(bits));
5668
write_bits(p + 32, uint32_t(bits >> 32));
5769
}

cp-algo/util/simd.hpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,21 @@ namespace cp_algo {
1616
using u8x32 = simd<uint8_t, 32>;
1717
using dx4 = simd<double, 4>;
1818

19-
[[gnu::always_inline]] inline dx4 abs(dx4 a) {
19+
[[gnu::target("avx2")]] inline dx4 abs(dx4 a) {
2020
return a < 0 ? -a : a;
2121
}
2222

2323
// https://stackoverflow.com/a/77376595
2424
// works for ints in (-2^51, 2^51)
2525
static constexpr dx4 magic = dx4() + (3ULL << 51);
26-
[[gnu::always_inline]] inline i64x4 lround(dx4 x) {
26+
[[gnu::target("avx2")]] inline i64x4 lround(dx4 x) {
2727
return i64x4(x + magic) - i64x4(magic);
2828
}
29-
[[gnu::always_inline]] inline dx4 to_double(i64x4 x) {
29+
[[gnu::target("avx2")]] inline dx4 to_double(i64x4 x) {
3030
return dx4(x + i64x4(magic)) - magic;
3131
}
3232

33-
[[gnu::always_inline]] inline dx4 round(dx4 a) {
33+
[[gnu::target("avx2")]] inline dx4 round(dx4 a) {
3434
return dx4{
3535
std::nearbyint(a[0]),
3636
std::nearbyint(a[1]),
@@ -39,37 +39,37 @@ namespace cp_algo {
3939
};
4040
}
4141

42-
[[gnu::always_inline]] inline u64x4 low32(u64x4 x) {
42+
[[gnu::target("avx2")]] inline u64x4 low32(u64x4 x) {
4343
return x & uint32_t(-1);
4444
}
45-
[[gnu::always_inline]] inline auto swap_bytes(auto x) {
45+
[[gnu::target("avx2")]] inline auto swap_bytes(auto x) {
4646
return decltype(x)(__builtin_shufflevector(u32x8(x), u32x8(x), 1, 0, 3, 2, 5, 4, 7, 6));
4747
}
48-
[[gnu::target("avx2"), gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
48+
[[gnu::target("avx2")]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
4949
auto x_ninv = u64x4(_mm256_mul_epu32(__m256i(x), __m256i() + imod));
5050
x += u64x4(_mm256_mul_epu32(__m256i(x_ninv), __m256i() + mod));
5151
return swap_bytes(x);
5252
}
5353

54-
[[gnu::target("avx2"), gnu::always_inline]] inline u64x4 montgomery_mul(u64x4 x, u64x4 y, uint32_t mod, uint32_t imod) {
54+
[[gnu::target("avx2")]] inline u64x4 montgomery_mul(u64x4 x, u64x4 y, uint32_t mod, uint32_t imod) {
5555
return montgomery_reduce(u64x4(_mm256_mul_epu32(__m256i(x), __m256i(y))), mod, imod);
5656
}
57-
[[gnu::always_inline]] inline u32x8 montgomery_mul(u32x8 x, u32x8 y, uint32_t mod, uint32_t imod) {
57+
[[gnu::target("avx2")]] inline u32x8 montgomery_mul(u32x8 x, u32x8 y, uint32_t mod, uint32_t imod) {
5858
return u32x8(montgomery_mul(u64x4(x), u64x4(y), mod, imod)) |
5959
u32x8(swap_bytes(montgomery_mul(u64x4(swap_bytes(x)), u64x4(swap_bytes(y)), mod, imod)));
6060
}
61-
[[gnu::always_inline]] inline dx4 rotate_right(dx4 x) {
61+
[[gnu::target("avx2")]] inline dx4 rotate_right(dx4 x) {
6262
static constexpr u64x4 shuffler = {3, 0, 1, 2};
6363
return __builtin_shuffle(x, shuffler);
6464
}
6565

6666
template<std::size_t Align = 32>
67-
[[gnu::always_inline]] inline bool is_aligned(const auto* p) noexcept {
67+
[[gnu::target("avx2")]] inline bool is_aligned(const auto* p) noexcept {
6868
return (reinterpret_cast<std::uintptr_t>(p) % Align) == 0;
6969
}
7070

7171
template<class Target>
72-
[[gnu::always_inline]] inline Target& vector_cast(auto &&p) {
72+
[[gnu::target("avx2")]] inline Target& vector_cast(auto &&p) {
7373
return *reinterpret_cast<Target*>(std::assume_aligned<alignof(Target)>(&p));
7474
}
7575
}

0 commit comments

Comments
 (0)