-
Notifications
You must be signed in to change notification settings - Fork 412
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature: switch to xxHash for digests
xxHash is a far faster algorithm with md5. Signed-off-by: Rudi Grinberg <me@rgrinberg.com> <!-- ps-id: 4b7a67ae-5e18-49aa-b9ea-12bdf7ca649f -->
- Loading branch information
Showing
13 changed files
with
6,696 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
(library | ||
(name dune_digest) | ||
(libraries dune_metrics stdune) | ||
(libraries dune_metrics dune_xxh stdune) | ||
(instrumentation | ||
(backend bisect_ppx))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
open Stdune | ||
|
||
(** Digests (MD5) *) | ||
(** Digests *) | ||
|
||
type t | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
|
||
version=5dba1d3c2e1e5e1c6083f62eaed672c85944d7f7 | ||
|
||
pkg=xxh | ||
|
||
set -e -o pipefail | ||
|
||
TMP="$(mktemp -d)" | ||
trap "rm -rf $TMP" EXIT | ||
|
||
rm -rf $pkg | ||
mkdir -p $pkg | ||
|
||
( | ||
cd $TMP | ||
git clone https://github.com/rgrinberg/$pkg.git | ||
cd $pkg | ||
git checkout $version | ||
) | ||
|
||
SRC=$TMP/$pkg | ||
|
||
cp -v $SRC/src/*.{ml,mli} $SRC/src/{xxhash.c,xxhash.h,xxh_stubs.c} $pkg/ | ||
|
||
git checkout $pkg/dune | ||
git add -A . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
(library | ||
(name dune_xxh) | ||
(libraries unix) | ||
(foreign_stubs | ||
(language c) | ||
(names xxh_stubs xxhash) | ||
(flags -O3))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
module type S = sig | ||
type hash | ||
|
||
val string : string -> hash | ||
val file : Unix.file_descr -> string | ||
|
||
module Stream : sig | ||
type t | ||
|
||
val create : unit -> t | ||
val feed_bytes : t -> Bytes.t -> pos:int -> len:int -> unit | ||
val hash : t -> hash | ||
val reset : t -> unit | ||
end | ||
end | ||
|
||
module XXH3_128bits = struct | ||
external string : string -> string = "xxh3_128_string" | ||
(* external hash_64bits : string -> Int64.t = "xxh3_64_string" *) | ||
|
||
external file : Unix.file_descr -> string = "xxh_128_fd" | ||
|
||
module Stream = struct | ||
type t | ||
|
||
external create : unit -> t = "xxh_create" | ||
|
||
external feed_bytes : t -> Bytes.t -> pos:int -> len:int -> unit | ||
= "xxh_feed_bytes" | ||
|
||
external hash : t -> string = "xxh_128bits" | ||
external reset : t -> unit = "xxh_reset" | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
(** Bindings to the xxHash non-cryptographic hash algorithm | ||
https://cyan4973.github.io/xxHash/ | ||
*) | ||
|
||
module type S = sig | ||
type hash | ||
|
||
val string : string -> hash | ||
val file : Unix.file_descr -> string | ||
|
||
module Stream : sig | ||
type t | ||
|
||
val create : unit -> t | ||
val feed_bytes : t -> Bytes.t -> pos:int -> len:int -> unit | ||
val hash : t -> hash | ||
val reset : t -> unit | ||
end | ||
end | ||
|
||
module XXH3_128bits : S with type hash := string | ||
(* TODO *) | ||
(* module XXH3_64bit : S with type hash := int64 *) | ||
(* module XXH64 : S with type hash := int64 *) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#include <caml/alloc.h> | ||
#include <caml/custom.h> | ||
#include <caml/fail.h> | ||
#include <caml/memory.h> | ||
#include <caml/mlvalues.h> | ||
#include <caml/threads.h> | ||
#include <caml/unixsupport.h> | ||
|
||
#include <unistd.h> | ||
|
||
#include "xxhash.h" | ||
|
||
static inline value alloc_128(XXH128_hash_t hash) { | ||
value v_ret = caml_alloc_string(16); | ||
uint64_t *ret = (uint64_t *)&Byte(v_ret, 0); | ||
uint64_t low = hash.low64; | ||
uint64_t high = hash.high64; | ||
ret[0] = hash.low64; | ||
ret[1] = hash.high64; | ||
return v_ret; | ||
} | ||
|
||
CAMLprim value xxh3_128_string(value v_s) { | ||
CAMLparam1(v_s); | ||
CAMLlocal1(v_ret); | ||
int len = caml_string_length(v_s); | ||
XXH128_hash_t hash = XXH3_128bits(&Byte(v_s, 0), len); | ||
v_ret = alloc_128(hash); | ||
CAMLreturn(v_ret); | ||
} | ||
|
||
CAMLprim value xxh3_64_string(value v_s) { | ||
CAMLparam1(v_s); | ||
CAMLlocal1(v_ret); | ||
int len = caml_string_length(v_s); | ||
uint64_t hash = XXH3_64bits(&Byte(v_s, 0), len); | ||
v_ret = caml_copy_int64(hash); | ||
CAMLreturn(v_ret); | ||
} | ||
|
||
CAMLprim value xxh_128_fd(value v_fd) { | ||
CAMLparam1(v_fd); | ||
CAMLlocal1(v_ret); | ||
int fd = Int_val(v_fd); | ||
caml_release_runtime_system(); | ||
XXH3_state_t *state = XXH3_createState(); | ||
if (state == NULL) | ||
caml_raise_out_of_memory(); | ||
if (XXH3_128bits_reset(state) == XXH_ERROR) { | ||
caml_failwith("xxh_128_fd: failed to reset"); | ||
} | ||
char buffer[UNIX_BUFFER_SIZE]; | ||
size_t count; | ||
while ((count = read(fd, buffer, sizeof(buffer))) != 0) { | ||
XXH3_128bits_update(state, buffer, count); | ||
} | ||
XXH128_hash_t hash = XXH3_128bits_digest(state); | ||
XXH3_freeState(state); | ||
caml_acquire_runtime_system(); | ||
v_ret = alloc_128(hash); | ||
CAMLreturn(v_ret); | ||
} | ||
|
||
#define Stream_val(v) (*(XXH3_state_t **)Data_custom_val(v)) | ||
|
||
static void xxh_stream_finalize(value v_state) { | ||
XXH3_state_t *state = Stream_val(v_state); | ||
XXH3_freeState(state); | ||
} | ||
|
||
static struct custom_operations xxh_stream_t_ops = { | ||
"xxh.stream", | ||
xxh_stream_finalize, | ||
custom_compare_default, | ||
custom_hash_default, | ||
custom_serialize_default, | ||
custom_deserialize_default, | ||
custom_compare_ext_default, | ||
custom_fixed_length_default}; | ||
|
||
CAMLprim value xxh_create(value v_unit) { | ||
CAMLparam1(v_unit); | ||
XXH3_state_t *state = XXH3_createState(); | ||
if (XXH3_128bits_reset(state) == XXH_ERROR) { | ||
caml_failwith("xxh_128bits_reset: failed to reset"); | ||
} | ||
value v_state = | ||
caml_alloc_custom(&xxh_stream_t_ops, sizeof(XXH3_state_t *), 0, 1); | ||
Stream_val(v_state) = state; | ||
CAMLreturn(v_state); | ||
} | ||
|
||
CAMLprim value xxh_feed_bytes(value v_state, value v_bytes, value v_pos, | ||
value v_len) { | ||
CAMLparam4(v_state, v_bytes, v_pos, v_len); | ||
XXH3_state_t *state = Stream_val(v_state); | ||
XXH3_128bits_update(state, &Byte(v_bytes, Int_val(v_pos)), Int_val(v_len)); | ||
CAMLreturn(Val_unit); | ||
} | ||
|
||
CAMLprim value xxh_128bits(value v_state) { | ||
CAMLparam1(v_state); | ||
CAMLlocal1(v_ret); | ||
XXH3_state_t *state = Stream_val(v_state); | ||
XXH128_hash_t hash = XXH3_128bits_digest(state); | ||
v_ret = alloc_128(hash); | ||
CAMLreturn(v_ret); | ||
} | ||
|
||
CAMLprim value xxh_reset(value v_state) { | ||
CAMLparam1(v_state); | ||
XXH3_state_t *state = Stream_val(v_state); | ||
if (XXH3_128bits_reset(state) == XXH_ERROR) { | ||
caml_failwith("xxh_128bits_reset: failed to reset"); | ||
} | ||
CAMLreturn(Val_unit); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* xxHash - Extremely Fast Hash algorithm | ||
* Copyright (C) 2012-2021 Yann Collet | ||
* | ||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) | ||
* | ||
* Redistribution and use in source and binary forms, with or without | ||
* modification, are permitted provided that the following conditions are | ||
* met: | ||
* | ||
* * Redistributions of source code must retain the above copyright | ||
* notice, this list of conditions and the following disclaimer. | ||
* * Redistributions in binary form must reproduce the above | ||
* copyright notice, this list of conditions and the following disclaimer | ||
* in the documentation and/or other materials provided with the | ||
* distribution. | ||
* | ||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
* | ||
* You can contact the author at: | ||
* - xxHash homepage: https://www.xxhash.com | ||
* - xxHash source repository: https://github.com/Cyan4973/xxHash | ||
*/ | ||
|
||
|
||
/* | ||
* xxhash.c instantiates functions defined in xxhash.h | ||
*/ | ||
|
||
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ | ||
#define XXH_IMPLEMENTATION /* access definitions */ | ||
|
||
#include "xxhash.h" |
Oops, something went wrong.