This repository has been archived by the owner on Aug 4, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Bug 781627 - Copy security/nss/lib/freebl/sha_fast.c to mfbt. r=jlebar.
- Loading branch information
Showing
6 changed files
with
591 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,342 @@ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
#include <string.h> | ||
#include "mozilla/SHA1.h" | ||
#include "mozilla/Assertions.h" | ||
|
||
// FIXME: We should probably create a more complete mfbt/Endian.h. This assumes | ||
// that any compiler that doesn't define these macros is little endian. | ||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) | ||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ | ||
#define MOZ_IS_LITTLE_ENDIAN | ||
#endif | ||
#else | ||
#define MOZ_IS_LITTLE_ENDIAN | ||
#endif | ||
|
||
using namespace mozilla; | ||
|
||
static inline uint32_t SHA_ROTL(uint32_t t, uint32_t n) | ||
{ | ||
return ((t << n) | (t >> (32 - n))); | ||
} | ||
|
||
#ifdef MOZ_IS_LITTLE_ENDIAN | ||
static inline unsigned SHA_HTONL(unsigned x) { | ||
const unsigned int mask = 0x00FF00FF; | ||
x = (x << 16) | (x >> 16); | ||
return ((x & mask) << 8) | ((x >> 8) & mask); | ||
} | ||
#else | ||
static inline unsigned SHA_HTONL(unsigned x) { | ||
return x; | ||
} | ||
#endif | ||
|
||
static void shaCompress(volatile unsigned *X, const uint32_t * datain); | ||
|
||
#define SHA_F1(X,Y,Z) ((((Y)^(Z))&(X))^(Z)) | ||
#define SHA_F2(X,Y,Z) ((X)^(Y)^(Z)) | ||
#define SHA_F3(X,Y,Z) (((X)&(Y))|((Z)&((X)|(Y)))) | ||
#define SHA_F4(X,Y,Z) ((X)^(Y)^(Z)) | ||
|
||
#define SHA_MIX(n,a,b,c) XW(n) = SHA_ROTL(XW(a)^XW(b)^XW(c)^XW(n), 1) | ||
|
||
SHA1Sum::SHA1Sum() : size(0), mDone(false) | ||
{ | ||
// Initialize H with constants from FIPS180-1. | ||
H[0] = 0x67452301L; | ||
H[1] = 0xefcdab89L; | ||
H[2] = 0x98badcfeL; | ||
H[3] = 0x10325476L; | ||
H[4] = 0xc3d2e1f0L; | ||
} | ||
|
||
/* Explanation of H array and index values: | ||
* The context's H array is actually the concatenation of two arrays | ||
* defined by SHA1, the H array of state variables (5 elements), | ||
* and the W array of intermediate values, of which there are 16 elements. | ||
* The W array starts at H[5], that is W[0] is H[5]. | ||
* Although these values are defined as 32-bit values, we use 64-bit | ||
* variables to hold them because the AMD64 stores 64 bit values in | ||
* memory MUCH faster than it stores any smaller values. | ||
* | ||
* Rather than passing the context structure to shaCompress, we pass | ||
* this combined array of H and W values. We do not pass the address | ||
* of the first element of this array, but rather pass the address of an | ||
* element in the middle of the array, element X. Presently X[0] is H[11]. | ||
* So we pass the address of H[11] as the address of array X to shaCompress. | ||
* Then shaCompress accesses the members of the array using positive AND | ||
* negative indexes. | ||
* | ||
* Pictorially: (each element is 8 bytes) | ||
* H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf | | ||
* X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 | | ||
* | ||
* The byte offset from X[0] to any member of H and W is always | ||
* representable in a signed 8-bit value, which will be encoded | ||
* as a single byte offset in the X86-64 instruction set. | ||
* If we didn't pass the address of H[11], and instead passed the | ||
* address of H[0], the offsets to elements H[16] and above would be | ||
* greater than 127, not representable in a signed 8-bit value, and the | ||
* x86-64 instruction set would encode every such offset as a 32-bit | ||
* signed number in each instruction that accessed element H[16] or | ||
* higher. This results in much bigger and slower code. | ||
*/ | ||
#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */ | ||
#define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */ | ||
|
||
/* | ||
* SHA: Add data to context. | ||
*/ | ||
void SHA1Sum::update(const uint8_t *dataIn, uint32_t len) | ||
{ | ||
MOZ_ASSERT(!mDone); | ||
register unsigned int lenB; | ||
register unsigned int togo; | ||
|
||
if (!len) | ||
return; | ||
|
||
/* accumulate the byte count. */ | ||
lenB = (unsigned int)(size) & 63U; | ||
|
||
size += len; | ||
|
||
/* | ||
* Read the data into W and process blocks as they get full | ||
*/ | ||
if (lenB > 0) { | ||
togo = 64U - lenB; | ||
if (len < togo) | ||
togo = len; | ||
memcpy(u.b + lenB, dataIn, togo); | ||
len -= togo; | ||
dataIn += togo; | ||
lenB = (lenB + togo) & 63U; | ||
if (!lenB) { | ||
shaCompress(&H[H2X], u.w); | ||
} | ||
} | ||
while (len >= 64U) { | ||
len -= 64U; | ||
shaCompress(&H[H2X], (uint32_t *)dataIn); | ||
dataIn += 64U; | ||
} | ||
if (len) { | ||
memcpy(u.b, dataIn, len); | ||
} | ||
} | ||
|
||
|
||
/* | ||
* SHA: Generate hash value | ||
*/ | ||
void SHA1Sum::finish(uint8_t hashout[20]) | ||
{ | ||
MOZ_ASSERT(!mDone); | ||
register uint64_t size2 = size; | ||
register uint32_t lenB = (uint32_t)size2 & 63; | ||
|
||
static const uint8_t bulk_pad[64] = { 0x80,0,0,0,0,0,0,0,0,0, | ||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, | ||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; | ||
|
||
/* | ||
* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits | ||
*/ | ||
|
||
update(bulk_pad, (((55+64) - lenB) & 63) + 1); | ||
MOZ_ASSERT(((uint32_t)size & 63) == 56); | ||
/* Convert size from bytes to bits. */ | ||
size2 <<= 3; | ||
u.w[14] = SHA_HTONL((uint32_t)(size2 >> 32)); | ||
u.w[15] = SHA_HTONL((uint32_t)size2); | ||
shaCompress(&H[H2X], u.w); | ||
|
||
/* | ||
* Output hash | ||
*/ | ||
u.w[0] = SHA_HTONL(H[0]); | ||
u.w[1] = SHA_HTONL(H[1]); | ||
u.w[2] = SHA_HTONL(H[2]); | ||
u.w[3] = SHA_HTONL(H[3]); | ||
u.w[4] = SHA_HTONL(H[4]); | ||
memcpy(hashout, u.w, 20); | ||
mDone = true; | ||
} | ||
|
||
/* | ||
* SHA: Compression function, unrolled. | ||
* | ||
* Some operations in shaCompress are done as 5 groups of 16 operations. | ||
* Others are done as 4 groups of 20 operations. | ||
* The code below shows that structure. | ||
* | ||
* The functions that compute the new values of the 5 state variables | ||
* A-E are done in 4 groups of 20 operations (or you may also think | ||
* of them as being done in 16 groups of 5 operations). They are | ||
* done by the SHA_RNDx macros below, in the right column. | ||
* | ||
* The functions that set the 16 values of the W array are done in | ||
* 5 groups of 16 operations. The first group is done by the | ||
* LOAD macros below, the latter 4 groups are done by SHA_MIX below, | ||
* in the left column. | ||
* | ||
* gcc's optimizer observes that each member of the W array is assigned | ||
* a value 5 times in this code. It reduces the number of store | ||
* operations done to the W array in the context (that is, in the X array) | ||
* by creating a W array on the stack, and storing the W values there for | ||
* the first 4 groups of operations on W, and storing the values in the | ||
* context's W array only in the fifth group. This is undesirable. | ||
* It is MUCH bigger code than simply using the context's W array, because | ||
* all the offsets to the W array in the stack are 32-bit signed offsets, | ||
* and it is no faster than storing the values in the context's W array. | ||
* | ||
* The original code for sha_fast.c prevented this creation of a separate | ||
* W array in the stack by creating a W array of 80 members, each of | ||
* whose elements is assigned only once. It also separated the computations | ||
* of the W array values and the computations of the values for the 5 | ||
* state variables into two separate passes, W's, then A-E's so that the | ||
* second pass could be done all in registers (except for accessing the W | ||
* array) on machines with fewer registers. The method is suboptimal | ||
* for machines with enough registers to do it all in one pass, and it | ||
* necessitates using many instructions with 32-bit offsets. | ||
* | ||
* This code eliminates the separate W array on the stack by a completely | ||
* different means: by declaring the X array volatile. This prevents | ||
* the optimizer from trying to reduce the use of the X array by the | ||
* creation of a MORE expensive W array on the stack. The result is | ||
* that all instructions use signed 8-bit offsets and not 32-bit offsets. | ||
* | ||
* The combination of this code and the -O3 optimizer flag on GCC 3.4.3 | ||
* results in code that is 3 times faster than the previous NSS sha_fast | ||
* code on AMD64. | ||
*/ | ||
static void | ||
shaCompress(volatile unsigned *X, const uint32_t *inbuf) | ||
{ | ||
register unsigned A, B, C, D, E; | ||
|
||
|
||
#define XH(n) X[n-H2X] | ||
#define XW(n) X[n-W2X] | ||
|
||
#define K0 0x5a827999L | ||
#define K1 0x6ed9eba1L | ||
#define K2 0x8f1bbcdcL | ||
#define K3 0xca62c1d6L | ||
|
||
#define SHA_RND1(a,b,c,d,e,n) \ | ||
a = SHA_ROTL(b,5)+SHA_F1(c,d,e)+a+XW(n)+K0; c=SHA_ROTL(c,30) | ||
#define SHA_RND2(a,b,c,d,e,n) \ | ||
a = SHA_ROTL(b,5)+SHA_F2(c,d,e)+a+XW(n)+K1; c=SHA_ROTL(c,30) | ||
#define SHA_RND3(a,b,c,d,e,n) \ | ||
a = SHA_ROTL(b,5)+SHA_F3(c,d,e)+a+XW(n)+K2; c=SHA_ROTL(c,30) | ||
#define SHA_RND4(a,b,c,d,e,n) \ | ||
a = SHA_ROTL(b,5)+SHA_F4(c,d,e)+a+XW(n)+K3; c=SHA_ROTL(c,30) | ||
|
||
#define LOAD(n) XW(n) = SHA_HTONL(inbuf[n]) | ||
|
||
A = XH(0); | ||
B = XH(1); | ||
C = XH(2); | ||
D = XH(3); | ||
E = XH(4); | ||
|
||
LOAD(0); SHA_RND1(E,A,B,C,D, 0); | ||
LOAD(1); SHA_RND1(D,E,A,B,C, 1); | ||
LOAD(2); SHA_RND1(C,D,E,A,B, 2); | ||
LOAD(3); SHA_RND1(B,C,D,E,A, 3); | ||
LOAD(4); SHA_RND1(A,B,C,D,E, 4); | ||
LOAD(5); SHA_RND1(E,A,B,C,D, 5); | ||
LOAD(6); SHA_RND1(D,E,A,B,C, 6); | ||
LOAD(7); SHA_RND1(C,D,E,A,B, 7); | ||
LOAD(8); SHA_RND1(B,C,D,E,A, 8); | ||
LOAD(9); SHA_RND1(A,B,C,D,E, 9); | ||
LOAD(10); SHA_RND1(E,A,B,C,D,10); | ||
LOAD(11); SHA_RND1(D,E,A,B,C,11); | ||
LOAD(12); SHA_RND1(C,D,E,A,B,12); | ||
LOAD(13); SHA_RND1(B,C,D,E,A,13); | ||
LOAD(14); SHA_RND1(A,B,C,D,E,14); | ||
LOAD(15); SHA_RND1(E,A,B,C,D,15); | ||
|
||
SHA_MIX( 0, 13, 8, 2); SHA_RND1(D,E,A,B,C, 0); | ||
SHA_MIX( 1, 14, 9, 3); SHA_RND1(C,D,E,A,B, 1); | ||
SHA_MIX( 2, 15, 10, 4); SHA_RND1(B,C,D,E,A, 2); | ||
SHA_MIX( 3, 0, 11, 5); SHA_RND1(A,B,C,D,E, 3); | ||
|
||
SHA_MIX( 4, 1, 12, 6); SHA_RND2(E,A,B,C,D, 4); | ||
SHA_MIX( 5, 2, 13, 7); SHA_RND2(D,E,A,B,C, 5); | ||
SHA_MIX( 6, 3, 14, 8); SHA_RND2(C,D,E,A,B, 6); | ||
SHA_MIX( 7, 4, 15, 9); SHA_RND2(B,C,D,E,A, 7); | ||
SHA_MIX( 8, 5, 0, 10); SHA_RND2(A,B,C,D,E, 8); | ||
SHA_MIX( 9, 6, 1, 11); SHA_RND2(E,A,B,C,D, 9); | ||
SHA_MIX(10, 7, 2, 12); SHA_RND2(D,E,A,B,C,10); | ||
SHA_MIX(11, 8, 3, 13); SHA_RND2(C,D,E,A,B,11); | ||
SHA_MIX(12, 9, 4, 14); SHA_RND2(B,C,D,E,A,12); | ||
SHA_MIX(13, 10, 5, 15); SHA_RND2(A,B,C,D,E,13); | ||
SHA_MIX(14, 11, 6, 0); SHA_RND2(E,A,B,C,D,14); | ||
SHA_MIX(15, 12, 7, 1); SHA_RND2(D,E,A,B,C,15); | ||
|
||
SHA_MIX( 0, 13, 8, 2); SHA_RND2(C,D,E,A,B, 0); | ||
SHA_MIX( 1, 14, 9, 3); SHA_RND2(B,C,D,E,A, 1); | ||
SHA_MIX( 2, 15, 10, 4); SHA_RND2(A,B,C,D,E, 2); | ||
SHA_MIX( 3, 0, 11, 5); SHA_RND2(E,A,B,C,D, 3); | ||
SHA_MIX( 4, 1, 12, 6); SHA_RND2(D,E,A,B,C, 4); | ||
SHA_MIX( 5, 2, 13, 7); SHA_RND2(C,D,E,A,B, 5); | ||
SHA_MIX( 6, 3, 14, 8); SHA_RND2(B,C,D,E,A, 6); | ||
SHA_MIX( 7, 4, 15, 9); SHA_RND2(A,B,C,D,E, 7); | ||
|
||
SHA_MIX( 8, 5, 0, 10); SHA_RND3(E,A,B,C,D, 8); | ||
SHA_MIX( 9, 6, 1, 11); SHA_RND3(D,E,A,B,C, 9); | ||
SHA_MIX(10, 7, 2, 12); SHA_RND3(C,D,E,A,B,10); | ||
SHA_MIX(11, 8, 3, 13); SHA_RND3(B,C,D,E,A,11); | ||
SHA_MIX(12, 9, 4, 14); SHA_RND3(A,B,C,D,E,12); | ||
SHA_MIX(13, 10, 5, 15); SHA_RND3(E,A,B,C,D,13); | ||
SHA_MIX(14, 11, 6, 0); SHA_RND3(D,E,A,B,C,14); | ||
SHA_MIX(15, 12, 7, 1); SHA_RND3(C,D,E,A,B,15); | ||
|
||
SHA_MIX( 0, 13, 8, 2); SHA_RND3(B,C,D,E,A, 0); | ||
SHA_MIX( 1, 14, 9, 3); SHA_RND3(A,B,C,D,E, 1); | ||
SHA_MIX( 2, 15, 10, 4); SHA_RND3(E,A,B,C,D, 2); | ||
SHA_MIX( 3, 0, 11, 5); SHA_RND3(D,E,A,B,C, 3); | ||
SHA_MIX( 4, 1, 12, 6); SHA_RND3(C,D,E,A,B, 4); | ||
SHA_MIX( 5, 2, 13, 7); SHA_RND3(B,C,D,E,A, 5); | ||
SHA_MIX( 6, 3, 14, 8); SHA_RND3(A,B,C,D,E, 6); | ||
SHA_MIX( 7, 4, 15, 9); SHA_RND3(E,A,B,C,D, 7); | ||
SHA_MIX( 8, 5, 0, 10); SHA_RND3(D,E,A,B,C, 8); | ||
SHA_MIX( 9, 6, 1, 11); SHA_RND3(C,D,E,A,B, 9); | ||
SHA_MIX(10, 7, 2, 12); SHA_RND3(B,C,D,E,A,10); | ||
SHA_MIX(11, 8, 3, 13); SHA_RND3(A,B,C,D,E,11); | ||
|
||
SHA_MIX(12, 9, 4, 14); SHA_RND4(E,A,B,C,D,12); | ||
SHA_MIX(13, 10, 5, 15); SHA_RND4(D,E,A,B,C,13); | ||
SHA_MIX(14, 11, 6, 0); SHA_RND4(C,D,E,A,B,14); | ||
SHA_MIX(15, 12, 7, 1); SHA_RND4(B,C,D,E,A,15); | ||
|
||
SHA_MIX( 0, 13, 8, 2); SHA_RND4(A,B,C,D,E, 0); | ||
SHA_MIX( 1, 14, 9, 3); SHA_RND4(E,A,B,C,D, 1); | ||
SHA_MIX( 2, 15, 10, 4); SHA_RND4(D,E,A,B,C, 2); | ||
SHA_MIX( 3, 0, 11, 5); SHA_RND4(C,D,E,A,B, 3); | ||
SHA_MIX( 4, 1, 12, 6); SHA_RND4(B,C,D,E,A, 4); | ||
SHA_MIX( 5, 2, 13, 7); SHA_RND4(A,B,C,D,E, 5); | ||
SHA_MIX( 6, 3, 14, 8); SHA_RND4(E,A,B,C,D, 6); | ||
SHA_MIX( 7, 4, 15, 9); SHA_RND4(D,E,A,B,C, 7); | ||
SHA_MIX( 8, 5, 0, 10); SHA_RND4(C,D,E,A,B, 8); | ||
SHA_MIX( 9, 6, 1, 11); SHA_RND4(B,C,D,E,A, 9); | ||
SHA_MIX(10, 7, 2, 12); SHA_RND4(A,B,C,D,E,10); | ||
SHA_MIX(11, 8, 3, 13); SHA_RND4(E,A,B,C,D,11); | ||
SHA_MIX(12, 9, 4, 14); SHA_RND4(D,E,A,B,C,12); | ||
SHA_MIX(13, 10, 5, 15); SHA_RND4(C,D,E,A,B,13); | ||
SHA_MIX(14, 11, 6, 0); SHA_RND4(B,C,D,E,A,14); | ||
SHA_MIX(15, 12, 7, 1); SHA_RND4(A,B,C,D,E,15); | ||
|
||
XH(0) += A; | ||
XH(1) += B; | ||
XH(2) += C; | ||
XH(3) += D; | ||
XH(4) += E; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | ||
/* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | ||
|
||
/* Simple class for computing SHA1. */ | ||
|
||
/* | ||
* To compute the SHA1 of a buffer using this class you should write something | ||
* like: | ||
* void SHA1(const uint8_t* buf, unsigned size, uint8_t hash[20]) | ||
* { | ||
* SHA1Sum S; | ||
* S.update(buf, size); | ||
* S.finish(hash); | ||
* } | ||
* If there are multiple buffers or chunks, the update method can be called | ||
* multiple times and the SHA1 is computed on the concatenation of all the | ||
* buffers passed to it. | ||
* The finish method may only be called once and cannot be followed by calls | ||
* to update. | ||
*/ | ||
|
||
#ifndef mozilla_SHA1_h_ | ||
#define mozilla_SHA1_h_ | ||
|
||
#include <stdint.h> | ||
namespace mozilla { | ||
class SHA1Sum { | ||
union { | ||
uint32_t w[16]; /* input buffer */ | ||
uint8_t b[64]; | ||
} u; | ||
uint64_t size; /* count of hashed bytes. */ | ||
unsigned H[22]; /* 5 state variables, 16 tmp values, 1 extra */ | ||
bool mDone; | ||
|
||
public: | ||
static const unsigned int HashSize = 20; | ||
SHA1Sum(); | ||
void update(const uint8_t *dataIn, uint32_t len); | ||
void finish(uint8_t hashout[20]); | ||
}; | ||
} | ||
|
||
#endif /* mozilla_SHA1_h_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.