-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMask.h
226 lines (208 loc) · 6.79 KB
/
Mask.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/*
* Copyright 2020 Florent Bondoux
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstdio>
#include "Charset.h"
#include "overflow.h"
#include <vector>
namespace Maskuni {
/**
* @brief Hold a mask and iterate over its content
* A mask is a list of charsets.
*
* \a setPosition must be called before iterating over the mask
* \a getCurrent should be use to get the first word of the mask.
* \a getNext should be called with the same parameter to get the subsequent words.
*
* @param T Either char or 8-bit charsets or uint32_t for unicode codepoints
*/
template<typename T>
class Mask
{
std::vector<Charset<T>> m_charsets; /*!< list of charsets from left to right */
size_t m_n_charsets; /*!< m_charsets.size() */
uint64_t m_len; /*!< sum of the charsets' length */
public:
/**
* @brief Create a new empty mask
*
* @param reserve Reserve memory for \a reserve charsets for faster insertions
*/
Mask(unsigned int reserve = 0) : m_charsets(), m_n_charsets(0), m_len(0)
{
m_charsets.reserve(reserve);
}
/**
* @brief erase all the content of the mask
*/
void clear()
{
m_charsets.clear();
m_len = 0;
m_n_charsets = 0;
}
/**
* @brief Add a charset to the right of the already defined charsets
* This method will abort if the length of the mask would not fit in an unsigned 64 bit integer
*
* @param set characters
* @param set_len number of characters
*/
void push_charset_right(const T *set, uint64_t set_len)
{
m_charsets.emplace_back(set, set_len);
if (m_n_charsets == 0) {
m_len = m_charsets.back().getLen();
} else {
if (umul64_overflow(m_len, m_charsets.back().getLen(), &m_len)) {
fprintf(stderr, "Error: the length of the mask would overflow a 64 bits integer\n");
abort();
}
}
m_n_charsets++;
}
/**
* @brief Add a charset to the right of the already defined charsets
* This method will abort if the length of the mask would not fit in an unsigned 64 bit integer
*
* @param charset charset
*/
void push_charset_right(const Charset<T> &charset)
{
m_charsets.emplace_back(charset);
if (m_n_charsets == 0) {
m_len = m_charsets.back().getLen();
} else {
if (umul64_overflow(m_len, m_charsets.back().getLen(), &m_len)) {
fprintf(stderr, "Error: the length of the mask would overflow a 64 bits integer\n");
abort();
}
}
m_n_charsets++;
}
/**
* @brief Add a charset to the left of the already defined charsets
* This method will abort if the length of the mask would not fit in an unsigned 64 bit integer
*
* @param set characters
* @param set_len number of characters
*/
void push_charset_left(const T *set, uint64_t set_len)
{
m_charsets.emplace(m_charsets.begin(), set, set_len);
if (m_n_charsets == 0) {
m_len = m_charsets.front().getLen();
} else {
if (umul64_overflow(m_len, m_charsets.front().getLen(), &m_len)) {
fprintf(stderr, "Error: the length of the mask would overflow a 64 bits integer\n");
abort();
}
}
m_n_charsets++;
}
/**
* @brief Add a charset to the left of the already defined charsets
* This method will abort if the length of the mask would not fit in an unsigned 64 bit integer
*
* @param charset charset
*/
void push_charset_left(const Charset<T> &charset)
{
m_charsets.emplace(m_charsets.begin(), charset);
if (m_n_charsets == 0) {
m_len = m_charsets.front().getLen();
} else {
if (umul64_overflow(m_len, m_charsets.front().getLen(), &m_len)) {
fprintf(stderr, "Error: the length of the mask would overflow a 64 bits integer\n");
abort();
}
}
m_n_charsets++;
}
/**
* @brief Get the length of this mask (number of words)
*
* @return Length of the mask
*/
inline __attribute__((always_inline)) uint64_t getLen() const
{
return m_len;
}
/**
* @brief Get the width of this mask (number of characters)
*
* @return Width of the mask
*/
inline __attribute__((always_inline)) size_t getWidth() const
{
return m_charsets.size();
}
/**
* @brief Set the current position in the mask (between 0 and \a getLen())
* Must be called before using \a getCurrent and \a getNext
*
* @param o Position
*/
void setPosition(uint64_t o)
{
if (m_len == 0) {
return;
}
if (o >= m_len) {
o = (o % m_len);
}
// set the position from right to left
for (auto it = m_charsets.rbegin(); it != m_charsets.rend(); it++) {
uint64_t s = (*it).getLen();
uint64_t q = o / s;
uint64_t r = o - q * s;
(*it).setPosition(r);
o = q;
}
}
/**
* @brief Copy the current word into w without incrementing the mask
* This method must be called to fully initialize a word.
*
* @param w buffer of at least getWidth() elements
*/
inline __attribute__((always_inline)) void getCurrent(T *w)
{
for (size_t i = 0; i < m_n_charsets; i++) {
m_charsets[i].getCurrent(w + i);
}
}
/**
* @brief Increment the mask and update a buffer with the next word
* Only the changed characters of the \a w parameter are updated
* therefore getNext whould always be called with the same parameter
* and only after initializing the first word with \a getCurrent.
*
* The word is iterated from right to left.
*
* @param w buffer of at least getWidth() elements
* @return true if the mask is back to position 0 ("carry")
*/
inline __attribute__((always_inline)) bool getNext(T *w)
{
bool carry = true;
for (size_t i = m_n_charsets; carry && i != 0; i--) {
carry = m_charsets[i - 1].getNext(w + (i - 1));
}
return carry;
}
};
}