diff --git a/Changes b/Changes index ae0ba00428b..41f45b190b5 100644 --- a/Changes +++ b/Changes @@ -252,6 +252,10 @@ Working version Guillaume Munch-Maccagnoni, KC Sivaramakrishnan, Stefan Muenzel, Xavier Leroy) +- #12307: Add BLAKE2b hashing and an MD5 submodule to the Digest module. + (Xavier Leroy, review by Olivier Nicole, Gabriel Scherer, Wiktor Kuchta, + Daniel Bünzli, David Allsopp) + - #12365: Add In_channel.input_bigarray, In_channel.really_input_bigarray, Out_channel.output_bigarray, Unix.read_bigarray, Unix.write_bigarray, Unix.single_write_bigarray. diff --git a/Makefile b/Makefile index c8f2f5bfd51..b25deb19257 100644 --- a/Makefile +++ b/Makefile @@ -1086,6 +1086,7 @@ runtime_COMMON_C_SOURCES = \ array \ backtrace \ bigarray \ + blake2 \ callback \ codefrag \ compare \ diff --git a/runtime/blake2.c b/runtime/blake2.c new file mode 100644 index 00000000000..dde68ee0f00 --- /dev/null +++ b/runtime/blake2.c @@ -0,0 +1,245 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, Collège de France and Inria Paris */ +/* */ +/* Copyright 2022 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +#define CAML_INTERNALS + +#include +#include "caml/alloc.h" +#include "caml/blake2.h" +#include "caml/custom.h" +#include "caml/memory.h" +#include "caml/mlvalues.h" + +/* BLAKE2 message digest */ + +static inline uint64_t U8TO64LE(const unsigned char * src) +{ + return (uint64_t) src[0] | ((uint64_t) src[1] << 8) + | ((uint64_t) src[2] << 16) | ((uint64_t) src[3] << 24) + | ((uint64_t) src[4] << 32) | ((uint64_t) src[5] << 40) + | ((uint64_t) src[6] << 48) | ((uint64_t) src[7] << 56); +} + +static inline uint64_t ROTR64(uint64_t x, int amount) +{ + return (x >> amount) | (x << (64 - amount)); +} + +static const uint64_t caml_BLAKE2_iv[8] = { + 0x6a09e667f3bcc908, + 0xbb67ae8584caa73b, + 0x3c6ef372fe94f82b, + 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, + 0x9b05688c2b3e6c1f, + 0x1f83d9abfb41bd6b, + 0x5be0cd19137e2179 +}; + +static const uint8_t BLAKE2_sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + +#define MIX2B(a,b,c,d,x,y) \ + do { \ + a += b + x; \ + d = ROTR64(d ^ a, 32); \ + c += d; \ + b = ROTR64(b ^ c, 24); \ + a += b + y; \ + d = ROTR64(d ^ a, 16); \ + c += d; \ + b = ROTR64(b ^ c, 63); \ + } while(0) \ + +static void +caml_BLAKE2Compress(struct BLAKE2_context * s, + const unsigned char * data, size_t numbytes, + int is_last_block) +{ + uint64_t v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, v15; + uint64_t m[16]; + + /* Update the length */ + s->len[0] += numbytes; + if (s->len[0] < numbytes) s->len[1]++; /* carry */ + /* Initialize work space */ + v0 = s->h[0]; v1 = s->h[1]; + v2 = s->h[2]; v3 = s->h[3]; + v4 = s->h[4]; v5 = s->h[5]; + v6 = s->h[6]; v7 = s->h[7]; + v8 = caml_BLAKE2_iv[0]; v9 = caml_BLAKE2_iv[1]; + v10 = caml_BLAKE2_iv[2]; v11 = caml_BLAKE2_iv[3]; + v12 = caml_BLAKE2_iv[4] ^ s->len[0]; v13 = caml_BLAKE2_iv[5] ^ s->len[1]; + v14 = is_last_block ? ~ caml_BLAKE2_iv[6] : caml_BLAKE2_iv[6]; + v15 = caml_BLAKE2_iv[7]; + /* Convert data to 16 64-bit words */ + for (int i = 0; i < 16; i++) { + m[i] = U8TO64LE(data + i * 8); + } + /* Twelve rounds of mixing */ + for (int i = 0; i < 12; i++) { + const uint8_t * sigma = BLAKE2_sigma[i]; + MIX2B(v0, v4, v8, v12, m[sigma[0]], m[sigma[1]]); + MIX2B(v1, v5, v9, v13, m[sigma[2]], m[sigma[3]]); + MIX2B(v2, v6, v10, v14, m[sigma[4]], m[sigma[5]]); + MIX2B(v3, v7, v11, v15, m[sigma[6]], m[sigma[7]]); + MIX2B(v0, v5, v10, v15, m[sigma[8]], m[sigma[9]]); + MIX2B(v1, v6, v11, v12, m[sigma[10]], m[sigma[11]]); + MIX2B(v2, v7, v8, v13, m[sigma[12]], m[sigma[13]]); + MIX2B(v3, v4, v9, v14, m[sigma[14]], m[sigma[15]]); + } + /* Update state */ + s->h[0] ^= v0 ^ v8; s->h[1] ^= v1 ^ v9; + s->h[2] ^= v2 ^ v10; s->h[3] ^= v3 ^ v11; + s->h[4] ^= v4 ^ v12; s->h[5] ^= v5 ^ v13; + s->h[6] ^= v6 ^ v14; s->h[7] ^= v7 ^ v15; +} + +CAMLexport void +caml_BLAKE2Init(struct BLAKE2_context * s, + size_t hashlen, + size_t keylen, const unsigned char * key) +{ + CAMLassert (hashlen <= 64); + for (int i = 0; i < 8; i++) s->h[i] = caml_BLAKE2_iv[i]; + s->h[0] ^= 0x01010000 | (keylen << 8) | hashlen; + s->len[0] = s->len[1] = 0; + s->numbytes = 0; + /* If key was supplied, pad to 128 bytes and prepend to message */ + if (keylen > 0) { + if (keylen > 64) keylen = 64; + memcpy(s->buffer, key, keylen); + memset(s->buffer + keylen, 0, BLAKE2_BLOCKSIZE - keylen); + s->numbytes = BLAKE2_BLOCKSIZE; + } +} + +CAMLexport void +caml_BLAKE2Update(struct BLAKE2_context * s, + const unsigned char * data, size_t len) +{ + /* If data was left in buffer, pad it with fresh data and compress */ + if (s->numbytes > 0) { + size_t n = BLAKE2_BLOCKSIZE - s->numbytes; + if (len <= n) { + /* Not enough fresh data to compress. Buffer the data. */ + memcpy(s->buffer + s->numbytes, data, len); + s->numbytes += len; + return; + } + memcpy(s->buffer + s->numbytes, data, n); + caml_BLAKE2Compress(s, s->buffer, BLAKE2_BLOCKSIZE, 0); + data += n; len -= n; + } + /* Process data by blocks of BLAKE2_BLOCKSIZE */ + while (len > BLAKE2_BLOCKSIZE) { + caml_BLAKE2Compress(s, data, BLAKE2_BLOCKSIZE, 0); + data += BLAKE2_BLOCKSIZE; len -= BLAKE2_BLOCKSIZE; + } + /* Save remaining data, up to one full block. This is because the + last block is treated specially in caml_BLAKE2Final. */ + memcpy(s->buffer, data, len); + s->numbytes = len; +} + +CAMLexport void +caml_BLAKE2Final(struct BLAKE2_context * s, + size_t hashlen, unsigned char * hash) +{ + CAMLassert (0 < hashlen && hashlen <= 64); + /* The final block is composed of the remaining data padded with zeros. */ + memset(s->buffer + s->numbytes, 0, BLAKE2_BLOCKSIZE - s->numbytes); + caml_BLAKE2Compress(s, s->buffer, s->numbytes, 1); + /* Extract the hash */ + for (unsigned int i = 0; i < hashlen; i++) { + hash[i] = s->h[i / 8] >> (8 * (i % 8)); + } +} + +/* OCaml wrappers */ + +#define BLAKE2_context_val(v) (*((struct BLAKE2_context **) Data_custom_val(v))) + +static void caml_blake2_finalize(value ctx) +{ + caml_stat_free(BLAKE2_context_val(ctx)); +} + +static struct custom_operations caml_blake2_operations = { + "_blake2", + caml_blake2_finalize, + custom_compare_default, + custom_hash_default, + custom_serialize_default, + custom_deserialize_default, + custom_compare_ext_default, + custom_fixed_length_default +}; + +CAMLprim value caml_blake2_create(value hashlen, value key) +{ + CAMLparam1(key); + struct BLAKE2_context * ctx = + caml_stat_alloc(sizeof(struct BLAKE2_context)); + value res = + caml_alloc_custom_mem(&caml_blake2_operations, + sizeof(struct BLAKE2_context *), + sizeof(struct BLAKE2_context)); + caml_BLAKE2Init(ctx, Long_val(hashlen), + caml_string_length(key), &Byte_u(key, 0)); + BLAKE2_context_val(res) = ctx; + CAMLreturn(res); +} + +CAMLprim value caml_blake2_update(value ctx, value buf, value ofs, value len) +{ + caml_BLAKE2Update(BLAKE2_context_val(ctx), + &Byte_u(buf, Long_val(ofs)), Long_val(len)); + return Val_unit; +} + +CAMLprim value caml_blake2_final(value ctx, value hashlen) +{ + CAMLparam1(ctx); + size_t len = Long_val(hashlen); + value hash = caml_alloc_string(len); + caml_BLAKE2Final(BLAKE2_context_val(ctx), len, &Byte_u(hash, 0)); + CAMLreturn(hash); +} + +CAMLprim value caml_blake2_string(value hashlen, value key, + value buf, value ofs, value len) +{ + struct BLAKE2_context ctx; + size_t hlen = Long_val(hashlen); + caml_BLAKE2Init(&ctx, hlen, caml_string_length(key), &Byte_u(key, 0)); + caml_BLAKE2Update(&ctx, &Byte_u(buf, Long_val(ofs)), Long_val(len)); + value hash = caml_alloc_string(hlen); + caml_BLAKE2Final(&ctx, hlen, &Byte_u(hash, 0)); + return hash; +} diff --git a/runtime/caml/blake2.h b/runtime/caml/blake2.h new file mode 100644 index 00000000000..e8bdcffaf4a --- /dev/null +++ b/runtime/caml/blake2.h @@ -0,0 +1,46 @@ +/**************************************************************************/ +/* */ +/* OCaml */ +/* */ +/* Xavier Leroy, Collège de France and Inria Paris */ +/* */ +/* Copyright 2022 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* All rights reserved. This file is distributed under the terms of */ +/* the GNU Lesser General Public License version 2.1, with the */ +/* special exception on linking described in the file LICENSE. */ +/* */ +/**************************************************************************/ + +/* BLAKE2 message digest */ + +#ifndef CAML_BLAKE2_H +#define CAML_BLAKE2_H + +#ifdef CAML_INTERNALS + +#include "misc.h" + +#define BLAKE2_BLOCKSIZE 128 + +struct BLAKE2_context { + uint64_t h[8]; + uint64_t len[2]; + size_t numbytes; + unsigned char buffer[BLAKE2_BLOCKSIZE]; +}; + +CAMLextern void +caml_BLAKE2Init(struct BLAKE2_context * context, + size_t hashlen, size_t keylen, const unsigned char * key); +CAMLextern void +caml_BLAKE2Update(struct BLAKE2_context * context, + const unsigned char * data, size_t len); +CAMLextern void +caml_BLAKE2Final(struct BLAKE2_context * context, + size_t hashlen, unsigned char * hash); + +#endif /* CAML_INTERNALS */ + +#endif /* CAML_BLAKE2_H */ diff --git a/stdlib/.depend b/stdlib/.depend index 6441fc1ba90..bc8a9d7ad4a 100644 --- a/stdlib/.depend +++ b/stdlib/.depend @@ -198,11 +198,15 @@ stdlib__Condition.cmi : condition.mli \ stdlib__Mutex.cmi stdlib__Digest.cmo : digest.ml \ stdlib__String.cmi \ + stdlib__Int.cmi \ + stdlib__In_channel.cmi \ stdlib__Char.cmi \ stdlib__Bytes.cmi \ stdlib__Digest.cmi stdlib__Digest.cmx : digest.ml \ stdlib__String.cmx \ + stdlib__Int.cmx \ + stdlib__In_channel.cmx \ stdlib__Char.cmx \ stdlib__Bytes.cmx \ stdlib__Digest.cmi diff --git a/stdlib/StdlibModules b/stdlib/StdlibModules index efd654069e0..37575c1cfa7 100644 --- a/stdlib/StdlibModules +++ b/stdlib/StdlibModules @@ -78,6 +78,8 @@ STDLIB_MODULE_BASENAMES = \ printexc \ fun \ gc \ + in_channel \ + out_channel \ digest \ bigarray \ random \ @@ -98,8 +100,6 @@ STDLIB_MODULE_BASENAMES = \ stringLabels \ moreLabels \ stdLabels \ - in_channel \ - out_channel \ effect STDLIB_PREFIXED_MODULES = \ diff --git a/stdlib/digest.ml b/stdlib/digest.ml index 408194b501c..f816bf5e12a 100644 --- a/stdlib/digest.ml +++ b/stdlib/digest.ml @@ -13,64 +13,179 @@ (* *) (**************************************************************************) -(* Message digest (MD5) *) +(* Utility functions *) -type t = string - -let compare = String.compare -let equal = String.equal - -external unsafe_string: string -> int -> int -> t = "caml_md5_string" -external channel: in_channel -> int -> t = "caml_md5_chan" - -let string str = - unsafe_string str 0 (String.length str) - -let bytes b = string (Bytes.unsafe_to_string b) - -let substring str ofs len = - if ofs < 0 || len < 0 || ofs > String.length str - len - then invalid_arg "Digest.substring" - else unsafe_string str ofs len - -let subbytes b ofs len = substring (Bytes.unsafe_to_string b) ofs len - -let file filename = - let ic = open_in_bin filename in - match channel ic (-1) with - | d -> close_in ic; d - | exception e -> close_in ic; raise e - -let output chan digest = - output_string chan digest - -let input chan = really_input_string chan 16 - -let char_hex n = - Char.unsafe_chr (n + if n < 10 then Char.code '0' else (Char.code 'a' - 10)) - -let to_hex d = - if String.length d <> 16 then invalid_arg "Digest.to_hex"; - let result = Bytes.create 32 in - for i = 0 to 15 do +let hex_of_string d = + let char_hex n = + Char.chr (if n < 10 then Char.code '0' + n + else Char.code 'a' + n - 10) in + let len = String.length d in + let result = Bytes.create (len * 2) in + for i = 0 to len - 1 do let x = Char.code d.[i] in Bytes.unsafe_set result (i*2) (char_hex (x lsr 4)); Bytes.unsafe_set result (i*2+1) (char_hex (x land 0x0f)); done; Bytes.unsafe_to_string result -let from_hex s = - if String.length s <> 32 then invalid_arg "Digest.from_hex"; +let string_of_hex s = let digit c = match c with | '0'..'9' -> Char.code c - Char.code '0' | 'A'..'F' -> Char.code c - Char.code 'A' + 10 | 'a'..'f' -> Char.code c - Char.code 'a' + 10 - | _ -> raise (Invalid_argument "Digest.from_hex") - in + | _ -> invalid_arg "Digest.of_hex" in let byte i = digit s.[i] lsl 4 + digit s.[i+1] in - let result = Bytes.create 16 in - for i = 0 to 15 do - Bytes.set result i (Char.chr (byte (2 * i))); - done; - Bytes.unsafe_to_string result + String.init (String.length s / 2) (fun i -> Char.chr (byte (2 * i))) + +(* Generic interface *) + +module type S = sig + type t = string + val hash_length : int + val compare : t -> t -> int + val equal : t -> t -> bool + val string : string -> t + val bytes : bytes -> t + val substring : string -> int -> int -> t + val subbytes : bytes -> int -> int -> t + val channel : in_channel -> int -> t + val file : string -> t + val output : out_channel -> t -> unit + val input : in_channel -> t + val to_hex : t -> string + val of_hex : string -> t +end + +(* BLAKE2 hashing, parameterized by hash size *) + +module BLAKE2 (X: sig val hash_length : int end) : S = struct + + type t = string + + let hash_length = + if X.hash_length < 1 || X.hash_length > 64 + then invalid_arg "Digest.BLAKE2: wrong hash size"; + X.hash_length + + let compare = String.compare + let equal = String.equal + + type state + + external create_gen: int -> string -> state = "caml_blake2_create" + external update: state -> string -> int -> int -> unit = "caml_blake2_update" + external final: state -> int -> t = "caml_blake2_final" + external unsafe_string: int -> string -> string -> int -> int -> t + = "caml_blake2_string" + + let create () = create_gen hash_length "" + + let string str = + unsafe_string hash_length "" str 0 (String.length str) + + let bytes b = + string (Bytes.unsafe_to_string b) + + let substring str ofs len = + if ofs < 0 || len < 0 || ofs > String.length str - len + then invalid_arg "Digest.substring"; + unsafe_string hash_length "" str ofs len + + let subbytes b ofs len = + substring (Bytes.unsafe_to_string b) ofs len + + let channel ic toread = + let buf_size = 4096 in + let buf = Bytes.create buf_size in + let ctx = create () in + if toread < 0 then begin + let rec do_read () = + let n = In_channel.input ic buf 0 buf_size in + if n = 0 + then final ctx hash_length + else (update ctx (Bytes.unsafe_to_string buf) 0 n; do_read ()) + in do_read () + end else begin + let rec do_read toread = + if toread = 0 then final ctx hash_length else begin + let n = In_channel.input ic buf 0 (Int.min buf_size toread) in + if n = 0 + then raise End_of_file + else begin + update ctx (Bytes.unsafe_to_string buf) 0 n; + do_read (toread - n) + end + end + in do_read toread + end + + let file filename = + In_channel.with_open_bin filename (fun ic -> channel ic (-1)) + + let output chan digest = output_string chan digest + + let input chan = really_input_string chan hash_length + + let to_hex d = + if String.length d <> hash_length then invalid_arg "Digest.to_hex"; + hex_of_string d + + let of_hex s = + if String.length s <> hash_length * 2 then invalid_arg "Digest.of_hex"; + string_of_hex s + +end + +module BLAKE128 = BLAKE2(struct let hash_length = 16 end) +module BLAKE256 = BLAKE2(struct let hash_length = 32 end) +module BLAKE512 = BLAKE2(struct let hash_length = 64 end) + +(* MD5 hashing *) + +module MD5 = struct + + type t = string + + let hash_length = 16 + + let compare = String.compare + let equal = String.equal + + external unsafe_string: string -> int -> int -> t = "caml_md5_string" + external channel: in_channel -> int -> t = "caml_md5_chan" + + let string str = + unsafe_string str 0 (String.length str) + + let bytes b = string (Bytes.unsafe_to_string b) + + let substring str ofs len = + if ofs < 0 || len < 0 || ofs > String.length str - len + then invalid_arg "Digest.substring" + else unsafe_string str ofs len + + let subbytes b ofs len = substring (Bytes.unsafe_to_string b) ofs len + + let file filename = + In_channel.with_open_bin filename (fun ic -> channel ic (-1)) + + let output chan digest = output_string chan digest + + let input chan = really_input_string chan 16 + + let to_hex d = + if String.length d <> 16 then invalid_arg "Digest.to_hex"; + hex_of_string d + + let of_hex s = + if String.length s <> 32 then invalid_arg "Digest.from_hex"; + string_of_hex s + +end + +(* Default exported implementation is MD5 *) + +include MD5 + +let from_hex = of_hex diff --git a/stdlib/digest.mli b/stdlib/digest.mli index 4d31dae346d..13a77c8d5eb 100644 --- a/stdlib/digest.mli +++ b/stdlib/digest.mli @@ -13,22 +13,24 @@ (* *) (**************************************************************************) -(** MD5 message digest. +(** Message digest. - This module provides functions to compute 128-bit 'digests' of - arbitrary-length strings or files. The algorithm used is MD5. + This module provides functions to compute 'digests', also known as + 'hashes', of arbitrary-length strings or files. + The supported hashing algorithms are BLAKE2 and MD5. *) - The MD5 hash function is not cryptographically secure. - Hence, this module should not be used for security-sensitive - applications. More recent, stronger cryptographic primitives - should be used instead. -*) +(** {1 Basic functions} *) + +(** The functions in this section use the MD5 hash function to produce + 128-bit digests (16 bytes). MD5 is not cryptographically secure. + Hence, these functions should not be used for security-sensitive + applications. The BLAKE2 functions below are cryptographically secure. *) type t = string -(** The type of digests: 16-character strings. *) +(** The type of digests: 16-byte strings. *) val compare : t -> t -> int -(** The comparison function for 16-character digest, with the same +(** The comparison function for 16-byte digests, with the same specification as {!Stdlib.compare} and the implementation shared with {!String.compare}. Along with the type [t], this function [compare] allows the module [Digest] to be passed as @@ -36,7 +38,7 @@ val compare : t -> t -> int @since 4.00 *) val equal : t -> t -> bool -(** The equal function for 16-character digest. +(** The equal function for 16-byte digests. @since 4.03 *) val string : string -> t @@ -55,7 +57,7 @@ val subbytes : bytes -> int -> int -> t of [s] starting at index [ofs] and containing [len] bytes. @since 4.02 *) -external channel : in_channel -> int -> t = "caml_md5_chan" +val channel : in_channel -> int -> t (** If [len] is nonnegative, [Digest.channel ic len] reads [len] characters from channel [ic] and returns their digest, or raises [End_of_file] if end-of-file is reached before [len] characters @@ -77,8 +79,98 @@ val to_hex : t -> string @raise Invalid_argument if the argument is not exactly 16 bytes. *) -val from_hex : string -> t +val of_hex : string -> t (** Convert a hexadecimal representation back into the corresponding digest. @raise Invalid_argument if the argument is not exactly 32 hexadecimal - characters. - @since 4.00 *) + characters. + @since 5.2 *) + +val from_hex : string -> t +(** Same function as {!Digest.of_hex}. + @since 4.00 *) + +(** {1 Generic interface} *) + +module type S = sig + + type t = string + (** The type of digests. *) + + val hash_length : int + (** The length of digests, in bytes. *) + + val compare : t -> t -> int + (** Compare two digests, with the same specification as + {!Stdlib.compare}. *) + + val equal : t -> t -> bool + (** Test two digests for equality. *) + + val string : string -> t + (** Return the digest of the given string. *) + + val bytes : bytes -> t + (** Return the digest of the given byte sequence. *) + + val substring : string -> int -> int -> t + (** [substring s ofs len] returns the digest of the substring + of [s] starting at index [ofs] and containing [len] characters. *) + + val subbytes : bytes -> int -> int -> t + (** [subbytes s ofs len] returns the digest of the subsequence + of [s] starting at index [ofs] and containing [len] bytes. *) + + val channel : in_channel -> int -> t + (** Read characters from the channel and return their digest. + See {!Digest.channel} for the full specification. *) + + val file : string -> t + (** Return the digest of the file whose name is given. *) + + val output : out_channel -> t -> unit + (** Write a digest on the given output channel. *) + + val input : in_channel -> t + (** Read a digest from the given input channel. *) + + val to_hex : t -> string + (** Return the printable hexadecimal representation of the given digest. + @raise Invalid_argument if the length of the argument + is not [hash_length], *) + + val of_hex : string -> t + (** Convert a hexadecimal representation back into the corresponding digest. + @raise Invalid_argument if the length of the argument + is not [2 * hash_length], or if the arguments contains non-hexadecimal + characters. *) +end + (** The signature for a hash function that produces digests of length + [hash_length] from character strings, byte arrays, and files. + @since 5.2 *) + +(** {1 Specific hash functions} *) + +module BLAKE128 : S + (** [BLAKE128] is the BLAKE2b hash function producing + 128-bit (16-byte) digests. It is cryptographically secure. + However, the small size of the digests enables brute-force attacks + in [2{^64}] attempts. + @since 5.2 *) + +module BLAKE256 : S + (** [BLAKE256] is the BLAKE2b hash function producing + 256-bit (32-byte) digests. It is cryptographically secure, + and the digests are large enough to thwart brute-force attacks. + @since 5.2 *) + +module BLAKE512 : S + (** [BLAKE512] is the BLAKE2b hash function producing + 512-bit (64-byte) digests. It is cryptographically secure, + and the digests are large enough to thwart brute-force attacks. + @since 5.2 *) + +module MD5 : S + (** [MD5] is the MD5 hash function. It produces 128-bit (16-byte) digests + and is not cryptographically secure at all. It should be used only + for compatibility with earlier designs that mandate the use of MD5. + @since 5.2 *) diff --git a/testsuite/tests/lib-digest/blake2b_self_test.ml b/testsuite/tests/lib-digest/blake2b_self_test.ml new file mode 100644 index 00000000000..c1e5a662d4b --- /dev/null +++ b/testsuite/tests/lib-digest/blake2b_self_test.ml @@ -0,0 +1,7 @@ +(* TEST + modules = "blake2b_self_test_stubs.c"; +*) + +external self_test_main : unit -> unit = "self_test_main" + +let () = self_test_main () diff --git a/testsuite/tests/lib-digest/blake2b_self_test.reference b/testsuite/tests/lib-digest/blake2b_self_test.reference new file mode 100644 index 00000000000..2ff9b9752d3 --- /dev/null +++ b/testsuite/tests/lib-digest/blake2b_self_test.reference @@ -0,0 +1 @@ +blake2b_selftest() = OK diff --git a/testsuite/tests/lib-digest/blake2b_self_test_stubs.c b/testsuite/tests/lib-digest/blake2b_self_test_stubs.c new file mode 100644 index 00000000000..bd471dd1922 --- /dev/null +++ b/testsuite/tests/lib-digest/blake2b_self_test_stubs.c @@ -0,0 +1,101 @@ +// Adapted from RFC 7693 https://datatracker.ietf.org/doc/html/rfc7693 +// Self test Module for BLAKE2b + +#define CAML_NAME_SPACE + +#include "caml/mlvalues.h" +#include "caml/memory.h" +#define CAML_INTERNALS +#include "caml/blake2.h" +#undef CAML_INTERNALS + +#include +#include + +// Deterministic sequences (Fibonacci generator). + +static void selftest_seq(uint8_t *out, size_t len, uint32_t seed) +{ + size_t i; + uint32_t t, a , b; + + a = 0xDEAD4BAD * seed; // prime + b = 1; + + for (i = 0; i < len; i++) { // fill the buf + t = a + b; + a = b; + b = t; + out[i] = (t >> 24) & 0xFF; + } +} + +// All-in-one convenience function. +static int blake2b(void *out, size_t outlen, // return buffer for digest + const void *key, size_t keylen, // optional secret key + const void *in, size_t inlen) // data to be hashed +{ + struct BLAKE2_context ctx; + + caml_BLAKE2Init(&ctx, outlen, keylen, key); + caml_BLAKE2Update(&ctx, in, inlen); + caml_BLAKE2Final(&ctx, outlen, out); + + return 0; +} + +// BLAKE2b self-test validation. Return 0 when OK. + +int blake2b_selftest() +{ + // grand hash of hash results + const uint8_t blake2b_res[32] = { + 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD, + 0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56, + 0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73, + 0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75 + }; + // parameter sets + const size_t b2b_md_len[4] = { 20, 32, 48, 64 }; + const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 }; + + size_t i, j, outlen, inlen; + uint8_t in[1024], md[64], key[64]; + struct BLAKE2_context ctx; + + // 256-bit hash for testing + caml_BLAKE2Init(&ctx, 32, 0, NULL); + + for (i = 0; i < 4; i++) { + outlen = b2b_md_len[i]; + for (j = 0; j < 6; j++) { + inlen = b2b_in_len[j]; + + selftest_seq(in, inlen, inlen); // unkeyed hash + blake2b(md, outlen, NULL, 0, in, inlen); + caml_BLAKE2Update(&ctx, md, outlen); // hash the hash + + selftest_seq(key, outlen, outlen); // keyed hash + blake2b(md, outlen, key, outlen, in, inlen); + caml_BLAKE2Update(&ctx, md, outlen); // hash the hash + } + } + + // compute and compare the hash of hashes + caml_BLAKE2Final(&ctx, 32, md); + for (i = 0; i < 32; i++) { + if (md[i] != blake2b_res[i]) + return -1; + } + + return 0; +} + +int self_test_main(value unused) +{ + CAMLparam1(unused); + printf("blake2b_selftest() = %s\n", + blake2b_selftest() ? "FAIL" : "OK"); + + CAMLreturn (Val_unit); +} diff --git a/testsuite/tests/lib-digest/digests.ml b/testsuite/tests/lib-digest/digests.ml new file mode 100644 index 00000000000..67d9ed428d5 --- /dev/null +++ b/testsuite/tests/lib-digest/digests.ml @@ -0,0 +1,73 @@ +(* TEST +*) + +module Test(H: Digest.S) = struct + + let string (msg, hh) = + assert (H.(equal (string msg) (of_hex hh))) + + let file wlen rlen = + let data = String.init wlen Char.unsafe_chr in + Out_channel.with_open_bin "data.tmp" + (fun oc -> Out_channel.output_string oc data); + let h1 = H.file "data.tmp" in + assert (H.equal h1 (H.string data)); + let h2 = + In_channel.with_open_bin "data.tmp" + (fun ic -> H.channel ic rlen) in + assert (H.equal h2 (H.substring data 0 rlen)); + Sys.remove "data.tmp" + + let run_tests tests = + List.iter string tests; + file 100 99; + file 100_000 10_000 +end + +(* Test inputs *) + +let in1 = "" +let in2 = "a" +let in3 = "abc" +let in4 = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno\ + ijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" +let in5 = String.make 100_000 'a' + +(* Test vectors *) + +module TestMD5 = Test(Digest.MD5) +let _ = TestMD5.run_tests + [in1, "d41d8cd98f00b204e9800998ecf8427e"; + in2, "0cc175b9c0f1b6a831c399e269772661"; + in3, "900150983cd24fb0d6963f7d28e17f72"; + in4, "03dd8807a93175fb062dfb55dc7d359c"; + in5, "1af6d6f2f682f76f80e606aeaaee1680"] + +module TestBLAKE512 = Test(Digest.BLAKE512) +let _ = TestBLAKE512.run_tests + [in1, "786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419\ + d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce"; + in2, "333fcb4ee1aa7c115355ec66ceac917c8bfd815bf7587d325aec1864edd24e34\ + d5abe2c6b1b5ee3face62fed78dbef802f2a85cb91d455a8f5249d330853cb3c"; + in3, "ba80a53f981c4d0d6a2797b69f12f6e94c212f14685ac4b74b12bb6fdbffa2d1\ + 7d87c5392aab792dc252d5de4533cc9518d38aa8dbf1925ab92386edd4009923"; + in4, "ce741ac5930fe346811175c5227bb7bfcd47f42612fae46c0809514f9e0e3a11\ + ee1773287147cdeaeedff50709aa716341fe65240f4ad6777d6bfaf9726e5e52"; + in5, "fe89a110a412012e7cc5c0e05b03b48a6b9d0ba108187826c5ac82ce7aa45e7e\ + 31b054979ec8ca5acd0bcc85f379d848f90f9d1593358cba8d88c7cd94ea8eee"] + +module TestBLAKE256 = Test(Digest.BLAKE256) +let _ = TestBLAKE256.run_tests + [in1, "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8"; + in2, "8928aae63c84d87ea098564d1e03ad813f107add474e56aedd286349c0c03ea4"; + in3, "bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319"; + in4, "90a0bcf5e5a67ac1578c2754617994cfc248109275a809a0721feebd1e918738"; + in5, "b717c86cf745507ec5373f12f21350eb8550039b4263f7ba6e8df9030b5673c6"] + +module TestBLAKE128 = Test(Digest.BLAKE128) +let _ = TestBLAKE128.run_tests + [in1, "cae66941d9efbd404e4d88758ea67670"; + in2, "27c35e6e9373877f29e562464e46497e"; + in3, "cf4ab791c62b8d2b2109c90275287816"; + in4, "8fa81cd08c10a6e4dd94583e6fb48c2f"; + in5, "5c4b4b762807b3290e7eee0aa9b18655"]