forked from gap-system/gap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblister.h
481 lines (437 loc) · 16.9 KB
/
blister.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
/****************************************************************************
**
*W blister.h GAP source Martin Schönert
**
**
*Y Copyright (C) 1996, Lehrstuhl D für Mathematik, RWTH Aachen, Germany
*Y (C) 1998 School Math and Comp. Sci., University of St Andrews, Scotland
*Y Copyright (C) 2002 The GAP Group
**
** This file declares the functions that mainly operate on boolean lists.
** Because boolean lists are just a special case of lists many things are
** done in the list package.
**
** A *boolean list* is a list that has no holes and contains only 'true' and
** 'false'. For the full definition of boolean list see chapter "Boolean
** Lists" in the {\GAP} Manual. Read also the section "More about Boolean
** Lists" about the different internal representations of such lists.
*/
#ifndef GAP_BLISTER_H
#define GAP_BLISTER_H
#include "bool.h"
#include "objects.h"
/****************************************************************************
**
*F IS_BLIST_REP( <list> ) . . . . . check if <list> is in boolean list rep
*/
static inline Int IS_BLIST_REP(Obj list)
{
return T_BLIST <= TNUM_OBJ(list) &&
TNUM_OBJ(list) <= T_BLIST_SSORT + IMMUTABLE;
}
/****************************************************************************
**
*F SIZE_PLEN_BLIST( <plen> ) . . size for a blist with given physical length
**
** 'SIZE_PLEN_BLIST' returns the size that a boolean list with room for
** <plen> elements must at least have.
**
*/
static inline Int SIZE_PLEN_BLIST(Int plen)
{
GAP_ASSERT(plen >= 0);
return sizeof(Obj) + (plen + BIPEB - 1) / BIPEB * sizeof(UInt);
}
/****************************************************************************
**
*F LEN_BLIST( <list> ) . . . . . . . . . . . . . . length of a boolean list
**
** 'LEN_BLIST' returns the logical length of the boolean list <list>, as a C
** integer.
**
*/
static inline Int LEN_BLIST(Obj list)
{
GAP_ASSERT(IS_BLIST_REP(list));
return INT_INTOBJ(CONST_ADDR_OBJ(list)[0]);
}
/***************************************************************************
**
*F NUMBER_BLOCKS_BLIST(<list>) . . . . . . . . number of UInt blocks in list
**
*/
static inline Int NUMBER_BLOCKS_BLIST(Obj blist)
{
GAP_ASSERT(IS_BLIST_REP(blist));
return (LEN_BLIST(blist) + BIPEB - 1) / BIPEB;
}
/****************************************************************************
**
*F SET_LEN_BLIST( <list>, <len> ) . . . . set the length of a boolean list
**
** 'SET_LEN_BLIST' sets the length of the boolean list <list> to the value
** <len>, which must be a positive C integer.
**
*/
static inline void SET_LEN_BLIST(Obj list, Int len)
{
GAP_ASSERT(IS_BLIST_REP(list));
GAP_ASSERT(len >= 0);
ADDR_OBJ(list)[0] = INTOBJ_INT(len);
}
/****************************************************************************
**
*F BLOCKS_BLIST( <list> ) . . . . . . . . . . first block of a boolean list
**
** returns a pointer to the start of the data of the Boolean list
**
*/
static inline UInt * BLOCKS_BLIST(Obj list)
{
return ((UInt *)(ADDR_OBJ(list) + 1));
}
static inline const UInt * CONST_BLOCKS_BLIST(Obj list)
{
return ((const UInt *)(CONST_ADDR_OBJ(list) + 1));
}
/****************************************************************************
**
*F BLOCK_ELM_BLIST_PTR( <list>, <pos> ) . . ptr to a block of a boolean list
**
** 'BLOCK_ELM_BLIST_PTR' return a pointer to the block containing the
** <pos>-th element of the boolean list <list>. <pos> must be a positive
** integer less than or equal to the length of <list>.
*/
static inline UInt * BLOCK_ELM_BLIST_PTR(Obj list, UInt pos)
{
return BLOCKS_BLIST(list) + ((pos)-1) / BIPEB;
}
static inline const UInt * CONST_BLOCK_ELM_BLIST_PTR(Obj list, UInt pos)
{
return CONST_BLOCKS_BLIST(list) + ((pos)-1) / BIPEB;
}
/****************************************************************************
**
*F MASK_POS_BLIST( <pos> ) . . . . bit mask for position of a Boolean list
**
** 'MASK_POS_BLIST(<pos>)' returns a UInt with a single set bit in position
** '(<pos>-1) % BIPEB',
** useful for accessing the <pos>-th element of a blist.
*/
static inline UInt MASK_POS_BLIST(UInt pos)
{
return ((UInt)1) << (pos - 1) % BIPEB;
}
/****************************************************************************
**
*F TEST_BIT_BLIST( <list>, <pos> ) . . . . . . test a bit of a boolean list
**
** 'TEST_BIT_BLIST' return a non-zero value if the <pos>-th element of the
** boolean list <list> is 1, and otherwise 0. <pos> must be a positive
** integer less than or equal to the length of <list>.
*/
static inline Int TEST_BIT_BLIST(Obj list, UInt pos)
{
return *CONST_BLOCK_ELM_BLIST_PTR(list, pos) & MASK_POS_BLIST(pos);
}
/****************************************************************************
**
*F ELM_BLIST( <list>, <pos> ) . . . . . . . . . . element of a boolean list
**
** 'ELM_BLIST' return the <pos>-th bit of the boolean list <list>, which is
** either 'true' or 'false'. <pos> must be a positive integer less than or
** equal to the length of <list>.
*/
static inline Obj ELM_BLIST(Obj list, UInt pos)
{
return TEST_BIT_BLIST(list, pos) ? True : False;
}
/****************************************************************************
**
*F SET_BIT_BLIST( <list>, <pos> ) . . . . . . . set a bit of a boolean list
*F CLEAR_BIT_BLIST( <list>, <pos> ) . . . . . clears a bit of a boolean list
**
** These function set the bit at position <pos> in the boolean list <list>
** to 1 resp. 0. <pos> must be a positive integer less than or equal to
** the length of <list>.
*/
static inline void SET_BIT_BLIST(Obj list, UInt pos)
{
*BLOCK_ELM_BLIST_PTR(list, pos) |= MASK_POS_BLIST(pos);
}
static inline void CLEAR_BIT_BLIST(Obj list, UInt pos)
{
*BLOCK_ELM_BLIST_PTR(list, pos) &= ~MASK_POS_BLIST(pos);
}
/****************************************************************************
**
*F COUNT_TRUES_BLOCK( <block> ) . . . . . . . . . . . count number of trues
**
** 'COUNT_TRUES_BLOCK( <block> )' returns the number of 1 bits in the
** UInt <block>. Two implementations are included below. One uses the
** gcc builtin __builtin_popcount which usually generates the popcntl
** or popcntq instruction on sufficiently recent CPUs. The other uses
** the algorithm described in the original comment below:
**
** The sequence to compute the number of bits in a block is quite clever.
** The idea is that after the <i>-th instruction each subblock of $2^i$ bits
** holds the number of bits of this subblock in the original block <m>.
** This is illustrated in the example below for a block of with 8 bits:
**
** // a b c d e f g h
** m = (m & 0x55) + ((m >> 1) & 0x55);
** // . b . d . f . h + . a . c . e . g = a+b c+d e+f g+h
** m = (m & 0x33) + ((m >> 2) & 0x33);
** // . . c+d . . g+h + . . a+b . . e+f = a+b+c+d e+f+g+h
** m = (m & 0x0f) + ((m >> 4) & 0x0f);
** // . . . . e+f+g+h + . . . . a+b+c+d = a+b+c+d+e+f+g+h
**
** In the actual code some unnecessary mask have been removed, improving
** performance quite a bit, because masks are 32 bit immediate values for
** which most RISC processors need two instructions to load them. Talking
** about performance. The code is close to optimal, it should compile to
** only about 22 MIPS or SPARC instructions. Dividing the block into 4
** bytes and looking up the number of bits of a byte in a table may be 10%
** faster, but only if the table lives in the data cache.
**
** At this time (2017) the optimum choice of implementation for this
** function as used seems to be use the gcc builtin on all systems --
** but see the comments below in the documentation of
** 'COUNT_TRUES_BLOCKS'.
**
*/
static inline UInt COUNT_TRUES_BLOCK( UInt block ) {
#if USE_POPCNT && defined(HAVE___BUILTIN_POPCOUNTL)
return __builtin_popcountl(block);
#else
#ifdef SYS_IS_64_BIT
block =
(block & 0x5555555555555555L) + ((block >> 1) & 0x5555555555555555L);
block =
(block & 0x3333333333333333L) + ((block >> 2) & 0x3333333333333333L);
block = (block + (block >> 4)) & 0x0f0f0f0f0f0f0f0fL;
block = (block + (block >> 8));
block = (block + (block >> 16));
block = (block + (block >> 32)) & 0x00000000000000ffL;
#else
block = (block & 0x55555555) + ((block >> 1) & 0x55555555);
block = (block & 0x33333333) + ((block >> 2) & 0x33333333);
block = (block + (block >> 4)) & 0x0f0f0f0f;
block = (block + (block >> 8));
block = (block + (block >> 16)) & 0x000000ff;
#endif
return block;
#endif
}
/****************************************************************************
**
*F COUNT_TRUES_BLOCKS( <ptr>, <nblocks> )
**
** 'COUNT_TRUES_BLOCKS( <ptr>, <nblocks> )' returns the total number of 1
** bits in the array of UInt values starting at <ptr> and including a total
** of <nblocks> UInts. The only reason this function is really needed is
** that, owing to hardware bugs and compiler peculiarities current in 2017,
** (see http://danluu.com/assembly-intrinsics/ or
** https://stackoverflow.com/questions/25078285?) manually unrolling this
** loop makes the code substantially faster on almost all CPUS.
**
** This interacts strangely with the choice of algorithm for
** COUNT_TRUES_BLOCK above. Without the loop unrolling, not using the gcc
** builtin is sometimes faster, apparently because it allows the compiler
** to unroll the loop and then generate SSE or AVX code to process multiple
** words at once. With the loop unrolling the builtin is always faster, and
** will itself generate AVX code when compiling for suitable processors.
**
*T monitor this situation periodically.
*/
static inline UInt COUNT_TRUES_BLOCKS(const UInt * ptr, UInt nblocks)
{
UInt n = 0;
while (nblocks >= 4) {
UInt n1 = COUNT_TRUES_BLOCK(*ptr++);
UInt n2 = COUNT_TRUES_BLOCK(*ptr++);
UInt n3 = COUNT_TRUES_BLOCK(*ptr++);
UInt n4 = COUNT_TRUES_BLOCK(*ptr++);
n += n1 + n2 + n3 + n4;
nblocks -= 4;
}
while (nblocks) {
n += COUNT_TRUES_BLOCK(*ptr++);
nblocks--;
}
// return the number of bits
return n;
}
/****************************************************************************
**
*F * * * * * * * * * * * * * * list functions * * * * * * * * * * * * * * * *
*/
/****************************************************************************
**
*F AssBlist( <list>, <pos>, <val> ) . . . . . . . assign to a boolean list
**
** 'AssBlist' assigns the value <val> to the boolean list <list> at the
** position <pos>. It is the responsibility of the caller to ensure that
** <pos> is positive, and that <val> is not 0.
**
** 'AssBlist' is the function in 'AssListFuncs' for boolean lists.
**
** If <pos> is less than or equal to the logical length of the boolean list
** and <val> is 'true' or 'false' the assignment is done by setting the
** corresponding bit. If <pos> is one more than the logical length of the
** boolean list the assignment is done by resizing the boolean list if
** necessary, setting the corresponding bit and incrementing the logical
** length by one. Otherwise the boolean list is converted to an ordinary
** list and the assignment is performed the ordinary way.
*/
extern void AssBlist (
Obj list,
Int pos,
Obj val );
/****************************************************************************
**
*F ConvBlist( <list> ) . . . . . . . . . convert a list into a boolean list
**
** 'ConvBlist' changes the representation of boolean lists into the compact
** representation of type 'T_BLIST' described above.
*/
extern void ConvBlist (
Obj list );
/****************************************************************************
**
*F CopyBits( <fromblock>, <from-starting-bit>, <toblock>, <to-starting-bit>,
** <numbits> )
**
** 'CopyBits' copies <numbits> bits (numbering bits within a UInt
** from the least significant to the most significant) starting with
** bit number <from-starting-bit> of UInt *<fromblock> to a destination
** starting at bit <to-starting-bit> of *<toblock>. The source and
** destination are assumed to be non-overlapping.
**
** The full function is provided as an inline function here because it is
** used in both blister.c and vecgf2.c and the compiler can do significant
** optimisations if some or all of <frombits>, <tobits> and <numbits> are
** known at compile time.
*/
/* constructs a mask that selects bits <from> to <to> inclusive of a UInt */
static inline UInt MaskForCopyBits(UInt from, UInt to)
{
return ((to == BIPEB - 1) ? 0 : (1L << (to + 1))) - (1L << from);
}
/* copies a block of bits from the UInt <from> to the one pointed at
by <to> the block starts in <from> at <startbit> and ends at
<endbit>. It is shifted left by <shift> and must still fit within a
single word (so endbits + shift must be < BIPEB and frombits +
shift must be non-negative */
static inline void
CopyInWord(UInt * to, UInt startbit, UInt endbit, UInt from, Int shift)
{
UInt m = MaskForCopyBits(startbit + shift, endbit + shift);
*to &= ~m;
if (shift >= 0)
*to |= ((from << shift) & m);
else
*to |= ((from >> -shift) & m);
}
static ALWAYS_INLINE void CopyBits(const UInt * fromblock,
UInt frombit,
UInt * toblock,
UInt tobit,
UInt nbits)
{
UInt tailbits;
UInt x;
UInt wholeblocks;
if (!nbits)
return;
GAP_ASSERT(frombit < BIPEB);
GAP_ASSERT(tobit < BIPEB);
/* If the alignment of the two data blocks matches, things are relatively
* easy
*/
if (frombit == tobit) {
/* if the first and last words are the same word */
if ((frombit + nbits) < BIPEB) {
CopyInWord(toblock, frombit, frombit + nbits - 1, *fromblock, 0);
return;
}
/* do we need to start by copying a partial word */
if (frombit) {
CopyInWord(toblock, frombit, BIPEB - 1, *fromblock, 0);
fromblock++;
toblock++;
nbits -= (BIPEB - frombit);
frombit = 0;
tobit = 0;
}
/* Now move whole words */
if ((wholeblocks = nbits / BIPEB))
memcpy(toblock, fromblock, sizeof(UInt) * wholeblocks);
toblock += wholeblocks;
fromblock += wholeblocks;
nbits %= BIPEB;
/* Finally, we may need to finish with another partial word */
if (nbits)
CopyInWord(toblock, 0, nbits - 1, *fromblock, 0);
return;
}
/* Otherwise the bits are not aligned and we will be shifting */
if (tobit) {
/* How many bits are we going to put into the first destination word
*/
if (tobit + nbits <= BIPEB)
tailbits = nbits;
else
tailbits = BIPEB - tobit;
/* We might be able to get all we need from the first source word */
if (frombit + tailbits <= BIPEB) {
CopyInWord(toblock, frombit, frombit + tailbits - 1, *fromblock,
(tobit - frombit));
frombit += tailbits;
}
else {
CopyInWord(toblock, frombit, BIPEB - 1, *fromblock,
(tobit - frombit));
fromblock++;
CopyInWord(toblock, 0, tailbits + frombit - 1 - BIPEB,
fromblock[1], tobit + BIPEB - frombit);
frombit += tailbits - BIPEB;
}
toblock++;
nbits -= tailbits;
tobit = 0;
}
/* Main loop for long copies fills whole blocks of destination */
UInt m1 = MaskForCopyBits(frombit, BIPEB - 1);
while (nbits >= BIPEB) {
x = (*fromblock++ & m1) >> frombit;
x |= (*fromblock & ~m1) << (BIPEB - frombit);
*toblock++ = x;
nbits -= BIPEB;
}
/* Finally we may need to fill up a partial block at destination */
if (nbits) {
if (frombit + nbits <= BIPEB) {
CopyInWord(toblock, frombit, frombit + nbits - 1, *fromblock,
-frombit);
}
else {
CopyInWord(toblock, frombit, BIPEB - 1, *fromblock, -frombit);
fromblock++;
nbits -= BIPEB - frombit;
tobit = BIPEB - frombit;
frombit = 0;
CopyInWord(toblock, 0, nbits - 1, *fromblock, tobit);
}
}
}
/****************************************************************************
**
*F * * * * * * * * * * * * * initialize module * * * * * * * * * * * * * * *
*/
/****************************************************************************
**
*F InitInfoBlist() . . . . . . . . . . . . . . . . . table of init functions
*/
StructInitInfo * InitInfoBlist ( void );
#endif // GAP_BLISTER_H