Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move file hashing to a worker thread, add async method #21

Merged
merged 14 commits into from
Sep 22, 2019
30 changes: 30 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ declare const hasha: {
): string;
(input: hasha.HashaInput, options: hasha.Options<'buffer'>): Buffer;

/**
Asynchronously calculate the hash for a `string`, `Buffer`, or an array thereof.

In Node.js 12 or later, the operation is executed using `worker_threads`. A thread is lazily spawned on the first operation and lives until the end of the program execution. It's unrefed, so it won't keep the process alive.

@param input - Data you want to hash.

While strings are supported you should prefer buffers as they're faster to hash. Although if you already have a string you should not convert it to a buffer.

Pass an array instead of concatenating strings and/or buffers. The output is the same, but arrays do not incur the overhead of concatenation.

@returns A hash.

@example
```
import hasha = require('hasha');

(async () => {
console.log(await hasha.async('unicorn'));
//=> 'e233b19aabc7d5e53826fb734d1222f1f0444c3a3fc67ff4af370a66e7cadd2cb24009f1bc86f0bed12ca5fcb226145ad10fc5f650f6ef0959f8aadc5a594b27'
})();
```
*/
async(input: hasha.HashaInput): Promise<string>;
async(
input: hasha.HashaInput,
options: hasha.Options<hasha.ToStringEncoding>
): Promise<string>;
async(input: hasha.HashaInput, options: hasha.Options<'buffer'>): Promise<Buffer>;

/**
Create a [hash transform stream](https://nodejs.org/api/crypto.html#crypto_class_hash).

Expand Down
93 changes: 92 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,65 @@ const fs = require('fs');
const crypto = require('crypto');
const isStream = require('is-stream');

const requireOptional = (name, defaultValue) => {
try {
return require(name);
} catch (error) {
return defaultValue;
}
};

const {Worker} = requireOptional('worker_threads', {});

let worker; // Lazy
let taskIdCounter = 0;
const tasks = new Map();

const recreateWorkerError = sourceError => {
const error = new Error(sourceError.message);

for (const key of Object.keys(sourceError)) {
sindresorhus marked this conversation as resolved.
Show resolved Hide resolved
if (key !== 'message') {
error[key] = sourceError[key];
}
}

return error;
};

const createWorker = () => {
worker = new Worker('./thread.js');
worker.on('message', message => {
const task = tasks.get(message.id);
tasks.delete(message.id);
if (tasks.size === 0) {
worker.unref();
}

if (message.error === undefined) {
task.resolve(message.value);
} else {
task.reject(recreateWorkerError(message.error));
}
});
worker.on('error', err => {
sindresorhus marked this conversation as resolved.
Show resolved Hide resolved
// Any error here is effectively an equivalent of segfault, and have no scope, so we just throw it on callback level
throw err;
});
};

const taskWorker = (method, args, transferList) => new Promise((resolve, reject) => {
const id = taskIdCounter++;
tasks.set(id, {resolve, reject});

if (worker === undefined) {
createWorker();
}

worker.ref();
worker.postMessage({id, method, args}, transferList);
});

const hasha = (input, options = {}) => {
let outputEncoding = options.encoding || 'hex';

Expand Down Expand Up @@ -55,7 +114,39 @@ hasha.fromStream = async (stream, options = {}) => {
});
};

hasha.fromFile = async (filePath, options) => hasha.fromStream(fs.createReadStream(filePath), options);
if (Worker === undefined) {
hasha.fromFile = async (filePath, options) => hasha.fromStream(fs.createReadStream(filePath), options);
hasha.async = async (input, options) => hasha(input, options);
} else {
hasha.fromFile = async (filePath, options) => {
stroncium marked this conversation as resolved.
Show resolved Hide resolved
const algorithm = options !== undefined && options.algorithm !== undefined ? options.algorithm : 'sha512';
const encoding = options !== undefined && options.encoding !== undefined ? options.encoding : 'hex';

const hash = await taskWorker('hashFile', [algorithm, filePath]);

if (encoding === 'buffer') {
return Buffer.from(hash);
}

return Buffer.from(hash).toString(encoding);
};

hasha.async = async (input, options) => {
sindresorhus marked this conversation as resolved.
Show resolved Hide resolved
const algorithm = (options === undefined || options.algorithm === undefined) ? 'sha512' : options.algorithm;
let outputEncoding = (options === undefined || options.encoding === undefined) ? 'hex' : options.encoding;
if (outputEncoding === 'buffer') {
outputEncoding = undefined;
}

const hash = await taskWorker('hashInput', [algorithm || 'sha512', input]);

if (outputEncoding === undefined) {
return Buffer.from(hash);
}

return Buffer.from(hash).toString(outputEncoding);
};
}

hasha.fromFileSync = (filePath, options) => hasha(fs.readFileSync(filePath), options);

Expand Down
9 changes: 9 additions & 0 deletions index.test-d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ expectType<string>(hasha(['unicorn']));
expectType<string>(hasha([Buffer.from('unicorn', 'utf8')]));
expectType<string>(hasha(['unicorn', Buffer.from('unicorn', 'utf8')]));

expectType<Promise<string>>(hasha.async('unicorn'));
expectType<Promise<string>>(hasha.async('unicorn', {algorithm: 'md5'}));
expectType<Promise<string>>(hasha.async('unicorn', {encoding: 'latin1'}));
expectType<Promise<Buffer>>(hasha.async('unicorn', {encoding: 'buffer'}));

expectType<Promise<string>>(hasha.async(['unicorn']));
expectType<Promise<string>>(hasha.async([Buffer.from('unicorn', 'utf8')]));
expectType<Promise<string>>(hasha.async(['unicorn', Buffer.from('unicorn', 'utf8')]));

process.stdin.pipe(hasha.stream()).pipe(process.stdout);

expectType<Promise<string | null>>(hasha.fromStream(process.stdin));
Expand Down
5 changes: 5 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,10 @@
"proxyquire": "^2.1.0",
"tsd": "^0.7.2",
"xo": "^0.24.0"
},
"xo": {
"rules": {
"import/no-unresolved": "off"
}
}
}
13 changes: 13 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ hasha('unicorn');
```js
const hasha = require('hasha');

(async () => {
console.log(await hasha.async('unicorn'));
//=> 'e233b19aabc7d5e53826fb734d1222f1f0444c3a3fc67ff4af370a66e7cadd2cb24009f1bc86f0bed12ca5fcb226145ad10fc5f650f6ef0959f8aadc5a594b27'
})();
```

```js
const hasha = require('hasha');

// Hash the process input and output the hash sum
process.stdin.pipe(hasha.stream()).pipe(process.stdout);
```
Expand Down Expand Up @@ -91,6 +100,10 @@ Values: `md5` `sha1` `sha256` `sha512` *([Platform dependent](https://nodejs.org

*The `md5` algorithm is good for [file revving](https://github.com/sindresorhus/rev-hash), but you should never use `md5` or `sha1` for anything sensitive. [They're insecure.](http://googleonlinesecurity.blogspot.no/2014/09/gradually-sunsetting-sha-1.html)*

### hasha.async(input, [options])

Returns a hash asynchronously.

### hasha.stream([options])

Returns a [hash transform stream](https://nodejs.org/api/crypto.html#crypto_class_hash).
Expand Down
9 changes: 9 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ test('hasha()', t => {
t.is(hasha(fixture, {algorithm: 'md5'}).length, 32);
});

test('hasha.async()', async t => {
t.is((await hasha.async(Buffer.from('unicorn'))).length, 128);
t.is((await hasha.async('unicorn')).length, 128);
t.is((await hasha.async(['foo', 'bar'])).length, 128);
t.is(await hasha.async(['foo', Buffer.from('bar')]), hasha('foobar'));
t.true(Buffer.isBuffer(await hasha.async(Buffer.from('unicorn'), {encoding: 'buffer'})));
t.is((await hasha.async(Buffer.from('unicorn'), {algorithm: 'md5'})).length, 32);
});

test('hasha.stream()', t => {
t.true(isStream(hasha.stream()));
});
Expand Down
56 changes: 56 additions & 0 deletions thread.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
'use strict';
const fs = require('fs');
const crypto = require('crypto');
const {parentPort} = require('worker_threads');

const handlers = {
hashFile: (algorithm, filePath) => new Promise((resolve, reject) => {
const hasher = crypto.createHash(algorithm);
fs.createReadStream(filePath)
.on('error', reject)
.pipe(hasher)
.on('error', reject)
.on('finish', () => {
const {buffer} = hasher.read();
resolve({value: buffer, transferList: [buffer]});
});
}),
hashInput: async (algorithm, input) => {
sindresorhus marked this conversation as resolved.
Show resolved Hide resolved
const hasher = crypto.createHash(algorithm);

if (Array.isArray(input)) {
for (const part of input) {
hasher.update(part);
}
} else {
hasher.update(input);
}

const hash = hasher.digest().buffer;
return {value: hash, transferList: [hash]};
}
};

parentPort.on('message', async message => {
try {
const {method, args} = message;
const handler = handlers[method];

if (handler === undefined) {
throw new Error(`Unknown method '${method}'`);
}

const {value, transferList} = await handler(...args);
parentPort.postMessage({id: message.id, value}, transferList);
} catch (error) {
const newError = {message: error.message, stack: error.stack};

for (const key of Object.keys(error)) {
sindresorhus marked this conversation as resolved.
Show resolved Hide resolved
if (typeof error[key] !== 'object') {
newError[key] = error[key];
}
}

parentPort.postMessage({id: message.id, error: newError});
}
});