Skip to content

Commit

Permalink
feat: use wasm content hash implementation (microsoft#288)
Browse files Browse the repository at this point in the history
The hash speed was bugging me, particularly since we're soon going to
need to hash files twice (as of microsoft#277) so last night I wanted to see if a
webassembly version would fare better. Turns out it does. Hashing 1KB:

```
              7,520 ops/sec > current (1x)
             72,500 ops/sec > wasm (9.64x)

  Benches: 2
  Fastest: wasm
  Elapsed: 10.9s
```

The current JS algorithm only is hasing 7.5MB/s, which may sound like
a decent amount, but remember some node_modules can be massive. This
brings up a bit.

![](https://i.redd.it/tfugj4n3l6ez.png)
  • Loading branch information
connor4312 authored Jan 31, 2020
1 parent ce91b48 commit 96fccf1
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 111 deletions.
4 changes: 3 additions & 1 deletion gulpfile.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,9 @@ gulp.task('package:copy-extension-files', () =>
base: buildDir,
},
),
gulp.src('node_modules/source-map/lib/*.wasm').pipe(rename({ dirname: 'src' })),
gulp
.src(['node_modules/source-map/lib/*.wasm', 'node_modules/@c4312/chromehash/pkg/*.wasm'])
.pipe(rename({ dirname: 'src' })),
gulp.src(`${buildDir}/src/**/*.sh`).pipe(rename({ dirname: 'src' })),
).pipe(gulp.dest(distDir)),
);
Expand Down
10 changes: 5 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@
"intTest": "mocha --exit --timeout 20000 -s 3500 -u tdd --colors --reporter out/src/int-chrome/testSupport/loggingReporter.js \"./out/src/int-chrome/**/*.test.js\""
},
"dependencies": {
"@c4312/chromehash": "^0.2.0",
"color": "^3.1.2",
"glob-stream": "^6.1.0",
"js-beautify": "^1.10.0",
"jsonc-parser": "^2.2.0",
"long": "^4.0.0",
"micromatch": "^4.0.2",
"source-map": "^0.7.3",
"split2": "^3.1.1",
Expand Down
107 changes: 3 additions & 104 deletions src/common/hash/hash.ts
Original file line number Diff line number Diff line change
@@ -1,110 +1,9 @@
/*---------------------------------------------------------
* Copyright (C) Microsoft Corporation. All rights reserved.
*--------------------------------------------------------*/
import Long from 'long';
import { hash } from '@c4312/chromehash';
import { readFileRaw } from '../fsUtils';

/**
* An implementation of the Chrome content hashing algorithm used to verify
* whether files on disk are the same as those in the debug session.
*/
function calculateHash(input: Buffer): string {
const prime = [
new Long(0x3fb75161, 0, true),
new Long(0xab1f4e4f, 0, true),
new Long(0x82675bc5, 0, true),
new Long(0xcd924d35, 0, true),
new Long(0x81abe279, 0, true),
];
const random = [
new Long(0x67452301, 0, true),
new Long(0xefcdab89, 0, true),
new Long(0x98badcfe, 0, true),
new Long(0x10325476, 0, true),
new Long(0xc3d2e1f0, 0, true),
];
const randomOdd = [
new Long(0xb4663807, 0, true),
new Long(0xcc322bf5, 0, true),
new Long(0xd4f91bbd, 0, true),
new Long(0xa7bea11d, 0, true),
new Long(0x8f462907, 0, true),
];

const hashes = [
new Long(0, 0, true),
new Long(0, 0, true),
new Long(0, 0, true),
new Long(0, 0, true),
new Long(0, 0, true),
];
const zi = [
new Long(1, 0, true),
new Long(1, 0, true),
new Long(1, 0, true),
new Long(1, 0, true),
new Long(1, 0, true),
];
const k0x7FFFFFFF = new Long(0x7fffffff);

const buffer = normalize(input);
const inc = 4;

// First pass reads 4 bytes at a time
let current = 0;
for (let i = 0; i < buffer.byteLength - (buffer.byteLength % inc); i += inc) {
const d = buffer.readUInt32LE(i);
const v = d;

const xi = new Long(v).mul(randomOdd[current]).and(k0x7FFFFFFF);
hashes[current] = hashes[current].add(zi[current].mul(xi)).mod(prime[current]);
zi[current] = zi[current].mul(random[current]).mod(prime[current]);
current = current === hashes.length - 1 ? 0 : current + 1;
}

// If we have an odd number of bytes, calculate the rest of the hash
if (buffer.byteLength % inc) {
let v = 0;
for (let i = buffer.byteLength - (buffer.byteLength % inc); i < buffer.byteLength; ++i) {
v <<= 8;
v |= buffer.readUInt8(i);
}
const xi = new Long(v).mul(randomOdd[current]).and(k0x7FFFFFFF);
hashes[current] = hashes[current].add(zi[current].mul(xi)).mod(prime[current]);
zi[current] = zi[current].mul(random[current]).mod(prime[current]);
current = current === hashes.length - 1 ? 0 : current + 1;
}

for (let i = 0; i < hashes.length; ++i) {
hashes[i] = hashes[i].add(zi[i].mul(prime[i].sub(1))).mod(prime[i]);
}

let hash = '';
for (let i = 0; i < hashes.length; ++i) {
hash += hashes[i].toString(16).padStart(8, '0');
}
return hash;
}

const hasUTF8BOM = (buffer: Buffer) =>
buffer.byteLength >= 3 && buffer[0] === 0xef && buffer[1] === 0xbb && buffer[2] === 0xbf;
const hasUtf16LEBOM = (buffer: Buffer) =>
buffer.byteLength >= 2 && buffer[0] === 0xff && buffer[1] === 0xfe;
const hasUtf16BEBOM = (buffer: Buffer) =>
buffer.byteLength >= 2 && buffer[0] === 0xfe && buffer[1] === 0xff;

function normalize(buffer: Buffer): Buffer {
if (hasUTF8BOM(buffer)) return normalize(buffer.slice(3));
if (hasUtf16LEBOM(buffer)) return buffer.slice(2);
if (hasUtf16BEBOM(buffer)) return buffer.slice(2).swap16();
// if no byte order mark, assume it's utf8
return utf8ToUtf16(buffer);
}

function utf8ToUtf16(buffer: Buffer) {
return Buffer.from(buffer.toString('utf8'), 'utf16le');
}

/**
* Message sent to the hash worker.
*/
Expand All @@ -120,12 +19,12 @@ function startWorker(send: (message: HashResponse) => void) {
if ('file' in msg) {
const file = msg.file;
readFileRaw(file)
.then(data => send({ id: msg.id, hash: calculateHash(data) }))
.then(data => send({ id: msg.id, hash: hash(data) }))
.catch(() => send({ id: msg.id }));
} else if ('data' in msg) {
send({
id: msg.id,
hash: calculateHash(msg.data instanceof Buffer ? msg.data : Buffer.from(msg.data, 'utf-8')),
hash: hash(msg.data instanceof Buffer ? msg.data : Buffer.from(msg.data, 'utf-8')),
});
}
});
Expand Down

0 comments on commit 96fccf1

Please sign in to comment.