Skip to content

Commit c60a0f7

Browse files
committed
initial commit
0 parents  commit c60a0f7

File tree

13 files changed

+892
-0
lines changed

13 files changed

+892
-0
lines changed

bin/bench.ml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
open! Core
2+
open Core_bench
3+
open Bitarray
4+
5+
let () =
6+
Random.self_init ();
7+
let random_native () = Native.init ~f:(fun _ -> Random.bool ()) (64 * 4) in
8+
let random_vect () = Vectorized.init ~f:(fun _ -> Random.bool ()) (64 * 4) in
9+
10+
let n1 = random_native () and n2 = random_native () in
11+
let v1 = random_vect () and v2 = random_vect () in
12+
let and_native = Bench.Test.create ~name:"and-native" (fun () -> Native.and_ n1 n2) in
13+
let and_vect = Bench.Test.create ~name:"and-vect" (fun () -> Vectorized.and_ v1 v2) in
14+
let hamming_native =
15+
Bench.Test.create ~name:"native" (fun () -> Native.hamming_weight n1)
16+
in
17+
let hamming_vect =
18+
Bench.Test.create ~name:"vect" (fun () -> Vectorized.hamming_weight v1)
19+
in
20+
21+
let hamming_dist_native =
22+
Bench.Test.create ~name:"native" (fun () -> Native.hamming_distance n1 n2)
23+
in
24+
let hamming_dist_vect =
25+
Bench.Test.create ~name:"vect" (fun () -> Vectorized.hamming_distance v1 v2)
26+
in
27+
28+
let rep_native =
29+
Bench.Test.create ~name:"native" (fun () ->
30+
Native.replicate ~w:16 ~h:16 n1 ~dx:1 ~dy:2 ~ct:5)
31+
in
32+
let rep_vect =
33+
Bench.Test.create ~name:"vect" (fun () ->
34+
Vectorized.replicate ~w:16 ~h:16 v1 ~dx:1 ~dy:2 ~ct:5)
35+
in
36+
37+
let hash_native = Bench.Test.create ~name:"native" (fun () -> Vectorized.hash v1) in
38+
let hash_vect = Bench.Test.create ~name:"vect" (fun () -> Vectorized.vec_hash v1) in
39+
40+
Command_unix.run
41+
@@ Command.group ~summary:"bitvector benchmarks"
42+
[
43+
("hash", Bench.make_command [ hash_native; hash_vect ]);
44+
( "other",
45+
Bench.make_command
46+
[
47+
Bench.Test.create_group ~name:"replicate" [ rep_vect; rep_native ];
48+
Bench.Test.create_group ~name:"and" [ and_native; and_vect ];
49+
Bench.Test.create_group ~name:"hamming-weight"
50+
[ hamming_native; hamming_vect ];
51+
Bench.Test.create_group ~name:"hamming-dist"
52+
[ hamming_dist_native; hamming_dist_vect ];
53+
] );
54+
]

bin/dune

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
(executable
2+
(name bench)
3+
(libraries bitarray core core_bench core_unix.command_unix)
4+
(preprocess
5+
(pps ppx_jane)))

lib/bitarray.ispc

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
typedef int32 word_t;
2+
3+
export uniform bool bitarray_any(uniform word_t a[], uniform int len) {
4+
bool part = false;
5+
foreach (i = 0 ... len) { part |= popcnt((int32)a[i]) > 0; }
6+
return any(part);
7+
}
8+
9+
export uniform int bitarray_hamming_weight(uniform word_t a[],
10+
uniform int len) {
11+
uniform int part = 0;
12+
for (uniform int i = 0; i < len; i++) {
13+
part += popcnt(a[i]);
14+
}
15+
return part;
16+
}
17+
18+
inline bool read_bit(const uniform word_t x[], const int b,
19+
const uniform int len) {
20+
int i = b >> 5; // div by word_size = 32
21+
int j = b & (32 - 1); // mod by word size
22+
return ((x[i] >> j) & 1);
23+
}
24+
25+
inline int offset(int x, int y, uniform int w, uniform int h) {
26+
return ((h - 1 - y) * w) + x;
27+
}
28+
29+
export void bitarray_replicate(const uniform word_t a[], const uniform int dx,
30+
const uniform int dy, const uniform int ct,
31+
const uniform int w, const uniform int h,
32+
uniform word_t r[], const uniform int len) {
33+
foreach (word_idx = 0 ... len) {
34+
int start_bit = 32 * word_idx;
35+
int x = start_bit % w;
36+
int y = h - 1 - (start_bit / w);
37+
word_t word = a[word_idx];
38+
for (uniform int b = 0; b < 32; b++) {
39+
bool bit = false;
40+
int xx = x;
41+
int yy = y;
42+
for (uniform int c = 0; c < ct && xx >= 0 && xx < w && yy >= 0 && yy < h;
43+
c++) {
44+
bit |= read_bit(a, offset(xx, yy, w, h), len);
45+
xx -= dx;
46+
yy -= dy;
47+
}
48+
word |= ((int)bit) << b;
49+
y = (x == w - 1) ? y - 1 : y;
50+
x = (x == w - 1) ? 0 : (x + 1);
51+
}
52+
r[word_idx] = word;
53+
}
54+
}
55+
56+
// TODO: wrapping translation
57+
export void bitarray_translate(const uniform word_t a[], const uniform int dx,
58+
const uniform int dy, const uniform int w,
59+
const uniform int h, uniform word_t r[],
60+
const uniform int len) {
61+
foreach (word_idx = 0 ... len) {
62+
int start_bit = 32 * word_idx;
63+
// get the x,y coords of the least significant bit in the word
64+
int x = start_bit % w;
65+
int y = h - 1 - (start_bit / w);
66+
// get the shifted coords of the lsb
67+
int xx = x - dx;
68+
int yy = y - dy;
69+
// what is the bit offset of the shifted coords? the word that contains
70+
// this offset (and maybe the next word) will be the source for the
71+
// shift
72+
int read_word_bit = offset(xx, yy, w, h);
73+
int read_word1 = read_word_bit / 32;
74+
int read_word2 = read_word1 + 1;
75+
76+
word_t w1 = (read_word1 >= 0 && read_word1 < len) ? a[read_word1] : 0;
77+
word_t w2 = (read_word2 >= 0 && read_word2 < len) ? a[read_word2] : 0;
78+
79+
int k = read_word_bit % 32;
80+
// take the low k bits of w1 and the high 32-k bits of w2
81+
r[word_idx] = (w1 << (32 - k)) | (w2 >> k);
82+
}
83+
}
84+
85+
export void bitarray_corners(const uniform word_t a[], const uniform int w,
86+
const uniform int h, uniform word_t r[],
87+
const uniform int len) {
88+
foreach (word_idx = 0 ... len) {
89+
int start_bit = 32 * word_idx;
90+
int x = start_bit % w;
91+
int y = h - 1 - (start_bit / w);
92+
93+
word_t word = 0;
94+
for (uniform int b = 0; b < 32; b++) {
95+
int offset = start_bit + b;
96+
bool b1 = read_bit(a, offset - w - 1, len);
97+
bool b2 = read_bit(a, offset - w, len);
98+
bool b3 = read_bit(a, offset - w + 1, len);
99+
100+
bool b4 = read_bit(a, offset - 1, len);
101+
bool b5 = read_bit(a, offset, len);
102+
bool b6 = read_bit(a, offset + 1, len);
103+
104+
bool b7 = read_bit(a, offset + w - 1, len);
105+
bool b8 = read_bit(a, offset + w, len);
106+
bool b9 = read_bit(a, offset + w + 1, len);
107+
108+
bool bit = x > 0 && x < w && y > 0 && y < h && b5 &&
109+
(!(b1 || b2 || b4) || !(b2 || b3 || b6) || !(b4 || b7 || b8) ||
110+
!(b6 || b8 || b9));
111+
112+
word |= ((int)bit) << b;
113+
114+
y = (x == w - 1) ? y - 1 : y;
115+
x = (x == w - 1) ? 0 : (x + 1);
116+
}
117+
r[word_idx] = word;
118+
}
119+
}

lib/bitarray.ml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
module type S = sig
2+
type t [@@deriving compare, hash, sexp]
3+
4+
val length : t -> int
5+
val bits_per_word : int
6+
val nwords : int -> int
7+
val create : int -> bool -> t
8+
val init_fold : f:('a -> int -> 'a * bool) -> init:'a -> int -> t
9+
val init : f:(int -> bool) -> int -> t
10+
val init_bitmap : w:int -> h:int -> f:(i:int -> x:int -> y:int -> bool) -> t
11+
val of_list : bool list -> t
12+
val get : t -> int -> bool
13+
val to_list : t -> bool list
14+
val fold : t -> f:('a -> bool -> 'a) -> init:'a -> 'a
15+
val iteri : t -> f:(int -> bool -> unit) -> unit
16+
val is_empty : t -> bool
17+
val not : t -> t
18+
val and_ : t -> t -> t
19+
val or_ : t -> t -> t
20+
val xor : t -> t -> t
21+
val hamming_weight : t -> int
22+
val hamming_distance : t -> t -> int
23+
val jaccard : t -> t -> float
24+
val replicate : w:int -> h:int -> t -> dx:int -> dy:int -> ct:int -> t
25+
val corners : w:int -> h:int -> t -> t
26+
val pp_bitmap : w:int -> Format.formatter -> t -> unit
27+
end
28+
29+
include Vectorized
30+
module Native = Native
31+
module Vectorized = Vectorized

lib/bitarray.mli

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
module type S = sig
2+
type t [@@deriving compare, hash, sexp]
3+
4+
val length : t -> int
5+
val bits_per_word : int
6+
val nwords : int -> int
7+
val create : int -> bool -> t
8+
val init_fold : f:('a -> int -> 'a * bool) -> init:'a -> int -> t
9+
val init : f:(int -> bool) -> int -> t
10+
val init_bitmap : w:int -> h:int -> f:(i:int -> x:int -> y:int -> bool) -> t
11+
val of_list : bool list -> t
12+
val get : t -> int -> bool
13+
val to_list : t -> bool list
14+
val fold : t -> f:('a -> bool -> 'a) -> init:'a -> 'a
15+
val iteri : t -> f:(int -> bool -> unit) -> unit
16+
val is_empty : t -> bool
17+
val not : t -> t
18+
val and_ : t -> t -> t
19+
val or_ : t -> t -> t
20+
val xor : t -> t -> t
21+
val hamming_weight : t -> int
22+
val hamming_distance : t -> t -> int
23+
val jaccard : t -> t -> float
24+
val replicate : w:int -> h:int -> t -> dx:int -> dy:int -> ct:int -> t
25+
val corners : w:int -> h:int -> t -> t
26+
val pp_bitmap : w:int -> Format.formatter -> t -> unit
27+
end
28+
29+
include S
30+
module Native : S
31+
32+
module Vectorized : sig
33+
include S
34+
35+
val vec_hash : t -> int
36+
end

lib/bitarray_stubs.c

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
#include <caml/alloc.h>
2+
#include <caml/memory.h>
3+
#include <caml/mlvalues.h>
4+
#include <stdbool.h>
5+
#include <stdio.h>
6+
7+
#include "bitarray.h"
8+
9+
typedef int32_t word_t;
10+
11+
#define len(s) (caml_string_length(s) / (sizeof(word_t)))
12+
13+
CAMLprim value bitarray_and_stub(value b1, value b2, value b3) {
14+
const word_t *p1 = (const word_t *)String_val(b1),
15+
*p2 = (const word_t *)String_val(b2);
16+
word_t *p3 = (word_t *)Bytes_val(b3);
17+
18+
for (int i = 0; i < len(b1); i++) {
19+
p3[i] = p1[i] & p2[i];
20+
}
21+
return Val_unit;
22+
}
23+
24+
CAMLprim value bitarray_or_stub(value b1, value b2, value b3) {
25+
const word_t *p1 = (const word_t *)String_val(b1),
26+
*p2 = (const word_t *)String_val(b2);
27+
word_t *p3 = (word_t *)Bytes_val(b3);
28+
29+
for (int i = 0; i < len(b1); i++) {
30+
p3[i] = p1[i] | p2[i];
31+
}
32+
return Val_unit;
33+
}
34+
35+
CAMLprim value bitarray_xor_stub(value b1, value b2, value b3) {
36+
const word_t *p1 = (const word_t *)String_val(b1),
37+
*p2 = (const word_t *)String_val(b2);
38+
word_t *p3 = (word_t *)Bytes_val(b3);
39+
40+
for (int i = 0; i < len(b1); i++) {
41+
p3[i] = p1[i] ^ p2[i];
42+
}
43+
return Val_unit;
44+
}
45+
46+
CAMLprim value bitarray_any_stub(value b) {
47+
return Val_bool(bitarray_any((word_t *)(String_val(b)), len(b)));
48+
}
49+
50+
CAMLprim value bitarray_not_stub(value b1, value b2) {
51+
const word_t *p1 = (const word_t *)String_val(b1);
52+
word_t *p2 = (word_t *)Bytes_val(b2);
53+
54+
for (int i = 0; i < len(b1); i++) {
55+
p2[i] = ~p1[i];
56+
}
57+
return Val_unit;
58+
}
59+
60+
CAMLprim intnat bitarray_hamming_weight_stub(value b) {
61+
return bitarray_hamming_weight((word_t *)(String_val(b)), len(b));
62+
}
63+
64+
CAMLprim value bitarray_hamming_weight_stub_byte(value b) {
65+
return Val_int(bitarray_hamming_weight_stub(b));
66+
}
67+
68+
CAMLprim intnat bitarray_hamming_distance_stub(value b1, value b2) {
69+
const word_t *p1 = (const word_t *)String_val(b1),
70+
*p2 = (const word_t *)String_val(b2);
71+
int count = 0;
72+
73+
#pragma GCC unroll 8
74+
for (int i = 0; i < len(b1); i++) {
75+
count += __builtin_popcount(p1[i] ^ p2[i]);
76+
}
77+
return count;
78+
}
79+
80+
CAMLprim value bitarray_hamming_distance_stub_byte(value b1, value b2) {
81+
return Val_int(bitarray_hamming_distance_stub(b1, b2));
82+
}
83+
84+
CAMLprim double bitarray_jaccard_stub(value b1, value b2) {
85+
const word_t *p1 = (const word_t *)String_val(b1),
86+
*p2 = (const word_t *)String_val(b2);
87+
int union_ = 0, inter = 0, len = len(b1);
88+
89+
if (len == 0) {
90+
return 0.;
91+
}
92+
for (int i = 0; i < len; i++) {
93+
union_ += __builtin_popcount(p1[i] | p2[i]);
94+
inter += __builtin_popcount(p1[i] & p2[i]);
95+
}
96+
97+
return union_ == 0 ? 0.0 : 1.0 - ((double)inter / (double)union_);
98+
}
99+
100+
CAMLprim value bitarray_jaccard_stub_byte(value b1, value b2) {
101+
return caml_copy_double(bitarray_jaccard_stub(b1, b2));
102+
}
103+
104+
CAMLprim value bitarray_replicate_stub(value b1, intnat x, intnat y, intnat ct,
105+
intnat w, intnat h, value b2) {
106+
bitarray_replicate((word_t *)(String_val(b1)), x, y, ct, w, h,
107+
(word_t *)(Bytes_val(b2)), len(b1));
108+
return Val_unit;
109+
}
110+
111+
CAMLprim value bitarray_replicate_stub_byte(value b1, value x, value y,
112+
value ct, value w, value h,
113+
value b2) {
114+
return bitarray_replicate_stub(b1, Int_val(x), Int_val(y), Int_val(ct),
115+
Int_val(w), Int_val(h), b2);
116+
}
117+
118+
CAMLprim intnat bitarray_hash_stub(value d, value k) {
119+
int *dd = (int *)d;
120+
int *kk = (int *)k;
121+
long sum = 0;
122+
for (int i = 0; i < len(d); i += 2) {
123+
sum += (long)(dd[i] + kk[i]) * (long)(dd[i + 1] + kk[i + 1]);
124+
}
125+
return sum;
126+
}
127+
128+
CAMLprim value bitarray_hash_stub_byte(value d, value k) {
129+
return Val_int(bitarray_hash_stub(d, k));
130+
}
131+
132+
CAMLprim value bitarray_corners_stub(value b1, intnat w, intnat h, value b2) {
133+
bitarray_corners((word_t *)(String_val(b1)), w, h, (word_t *)(Bytes_val(b2)),
134+
len(b1));
135+
return Val_unit;
136+
}
137+
138+
CAMLprim value bitarray_corners_stub_byte(value b1, value w, value h,
139+
value b2) {
140+
return bitarray_corners_stub(b1, Int_val(w), Int_val(h), b2);
141+
}

0 commit comments

Comments
 (0)