@@ -177,27 +177,31 @@ macro denseSetBits(bitbufa, bytelen, ndx0, step: untyped) = # ndx0 must be var
177
177
type BitSeq = ref object
178
178
size: Natural
179
179
buffer: seq [byte ]
180
+ bufferp: ptr UncheckedArray [byte ]
180
181
181
- func newBitSeq (size: int ): BitSeq = # round up to even 64-bit size
182
- BitSeq (size: size, buffer: newSeq [byte ](((size - 1 + 64 ) shr 3 ) and (- 8 )))
182
+ func newBitSeq (size: int ): BitSeq =
183
+ # round up to even 256-bit size + 256 bits in bytes...
184
+ let sq = newSeq [byte ](((size - 1 + 512 ) shr 3 ) and (- 32 ))
185
+ let sqpi = (cast [int ](sq[0 ].unsafeAddr) + 31 ) and (- 32 )
186
+ BitSeq (size: size, buffer: sq, bufferp: cast [ptr UncheckedArray [byte ]](sqpi))
183
187
184
188
func `[]` (bitseq: BitSeq ; i: int ): bool {.inline .} =
185
- (bitseq.buffer [i shr 3 ] and BITMASK [i and 7 ]) != 0 'u8
189
+ (bitseq.bufferp [i shr 3 ] and BITMASK [i and 7 ]) != 0 'u8
186
190
187
191
func `[]` (bitseq: BitSeq ; startstop: HSlice [int , int ];
188
192
step: int = 1 ): iterator : bool {.closure .} {.inline .} =
189
193
assert step <= 0 or startstop.b < startstop.a,
190
194
" Error: illegal slice limits or step size!!"
191
195
return iterator : bool {.closure .} =
192
196
for i in countup (startstop.a, startstop.b, step):
193
- yield (bitseq.buffer [i shr 3 ] and BITMASK [i and 7 ]) != 0
197
+ yield (bitseq.bufferp [i shr 3 ] and BITMASK [i and 7 ]) != 0
194
198
195
199
# sets a range of the BitSeq by step size to true and returns the next index...
196
200
func setRange (bitseq: BitSeq ; start, stop: int ; step: int = 1 ,
197
201
hybrid: bool = false ) =
198
202
assert step <= 0 or stop < start or stop > bitseq.size,
199
203
" Error: illegal slice limits or step size!!!"
200
- let bitbufa = cast [int ](bitseq.buffer[ 0 ]. addr )
204
+ let bitbufa = cast [int ](bitseq.bufferp )
201
205
var ndx = start
202
206
let sz = min (bitseq.buffer.len, (stop + 8 ) shr 3 ) # round up
203
207
if start <= sz * 8 - 16 * step: # enough loops to be worth the setup
@@ -209,7 +213,7 @@ func setRange(bitseq: BitSeq; start, stop: int; step: int = 1,
209
213
ndx += step
210
214
211
215
func countTruesTo (bitseq: BitSeq ; index: int ): int =
212
- let bsp = cast [ptr UncheckedArray [uint64 ]](bitseq.buffer[ 0 ]. addr )
216
+ let bsp = cast [ptr UncheckedArray [uint64 ]](bitseq.bufferp )
213
217
let lstwrd = index shr 6
214
218
let mask = not ((0 'u64 - 2 'u64 ) shl (index and 63 ))
215
219
result = 0
@@ -229,8 +233,7 @@ func newPrimeSieve(lmt: Prime; tec: Techniques): PrimeSieve =
229
233
result = PrimeSieve (limit: lmt, # BitSeq size rounded up to nearest uint64...
230
234
sievebuffer: newBitSeq ((bitlmt + 64 ) and (- 63 )))
231
235
232
- let cmpstsBytesp =
233
- cast [ptr UncheckedArray [byte ]](result .sievebuffer.buffer[0 ].addr )
236
+ let cmpstsBytesp = result .sieveBuffer.bufferp
234
237
let starts = newSeq [int ](8 )
235
238
let startsp = cast [ptr UncheckedArray [int ]](starts[0 ].unsafeAddr)
236
239
@@ -242,37 +245,45 @@ func newPrimeSieve(lmt: Prime; tec: Techniques): PrimeSieve =
242
245
243
246
case tec:
244
247
of bittwiddle:
245
- let cmpstsBytesp =
246
- cast [ptr UncheckedArray [byte ]](result .sievebuffer.buffer[0 ].addr )
247
248
while cullIndex <= bitlmt:
248
249
let byteIndex = cullIndex shr 3
249
250
cmpstsBytesp[byteIndex] =
250
251
cmpstsBytesp[byteIndex] or BITMASK [cullIndex and 7 ]
251
252
cullIndex += basePrime
252
253
253
254
of stride8:
254
- let slmt = min (bitlmt, cullIndex + (basePrime shl 3 ) - 1 )
255
- let byteLimit = bitlmt shr 3
255
+ let bufa = cast [int ](cmpstsBytesp)
256
+ let bytealmt = bufa + (bitlmt shr 3 )
257
+ let slmt = min (bitlmt, cullIndex + (basePrime shl 3 ) - 1 )
256
258
while cullIndex <= slmt:
257
- let mask = BITMASK [cullIndex and 7 ]; var byteIndex = cullIndex shr 3
258
- while byteIndex <= byteLimit:
259
- let cp = cast [ptr byte ](cmpstsBytesp[byteIndex].unsafeAddr)
260
- cp[] = cp[] or mask; byteIndex += basePrime
259
+ let mask = BITMASK [cullIndex and 7 ]
260
+ var bytepa = bufa + (cullIndex shr 3 )
261
+ while bytepa <= bytealmt:
262
+ let cp = cast [ptr byte ](bytepa)
263
+ cp[] = cp[] or mask; bytepa += basePrime
261
264
cullIndex += basePrime
262
265
263
266
of stride8block:
264
- let startIndex = cullIndex shr 3 ; let bytelimit = bitlmt shr 3
267
+ let bufa = cast [int ](cmpstsBytesp); let bufalmt = bufa + (bitlmt shr 3 )
268
+ let starta = bufa + ((cullIndex shr 3 ) and (- CPUL1CACHE ))
269
+ let bp2 = basePrime + basePrime; let bp3 = basePrime + bp2
270
+ let bp4 = basePrime + bp3
265
271
for _ in 0 .. 7 :
266
- startsp[cullIndex and 7 ] = cullIndex shr 3 ; cullIndex += basePrime
267
- for pageIndex in countup (startIndex and ( - CPUL1CACHE ),
268
- byteLimit , CPUL1CACHE ):
269
- let pageLimit = min (byteLimit, pageIndex + CPUL1CACHE - 1 )
272
+ startsp[cullIndex and 7 ] = bufa + ( cullIndex shr 3 )
273
+ cullIndex += basePrime
274
+ for pagea in countup (starta, bufalmt , CPUL1CACHE ):
275
+ let pageLimit = min (bufalmt, pagea + CPUL1CACHE - 1 )
270
276
for si in 0 .. 7 :
271
- let mask = BITMASK [si]; var byteIndex = startsp[si]
272
- while byteIndex <= pageLimit:
273
- let cp = cast [ptr byte ](cmpstsBytesp[byteIndex].unsafeAddr)
274
- cp[] = cp[] or mask; byteIndex += basePrime
275
- startsp[si] = byteIndex
277
+ let mask = BITMASK [si]; var bytea = startsp[si]
278
+ while bytea <= pageLimit - bp3:
279
+ let cp = cast [ptr UncheckedArray [byte ]](bytea)
280
+ cp[0 ] = cp[0 ] or mask; cp[basePrime] = cp[basePrime] or mask
281
+ cp[bp2] = cp[bp2] or mask; cp[bp3] = cp[bp3] or mask
282
+ bytea += bp4
283
+ while bytea <= pageLimit:
284
+ let cp = cast [ptr byte ](bytea)
285
+ cp[] = cp[] or mask; bytea += basePrime
286
+ startsp[si] = bytea
276
287
277
288
of extreme:
278
289
result .sieveBuffer.setRange (cullIndex, bitlmt, step = basePrime)
0 commit comments