Skip to content

Commit

Permalink
Two improvements to make hashing large files much faster
Browse files Browse the repository at this point in the history
1. Use an index when updating the hasher instead of recopying the input array every time
2. Load the file into the hasher one piece at a time instead of all at
once to avoid memory issues
  • Loading branch information
rctcwyvrn committed May 14, 2020
1 parent 4bcbc6f commit 1632b85
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 15 deletions.
38 changes: 26 additions & 12 deletions src/com/github/rctcwyvrn/blake3/Blake3.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package com.github.rctcwyvrn.blake3;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;

/**
* Translation of the Blake3 reference implemenetation from Rust to Java
* Translation of the Blake3 reference implementation from Rust to Java
* BLAKE3 Source: https://github.com/BLAKE3-team/BLAKE3
* Translator: rctcwyvrn
*/
Expand Down Expand Up @@ -207,7 +208,8 @@ private int startFlag(){
}

private void update(byte[] input) {
while (input.length != 0) {
int currPos = 0;
while (currPos < input.length) {

// Chain the next 64 byte block into this chunk/node
if (blockLen == BLOCK_LEN) {
Expand All @@ -222,11 +224,11 @@ private void update(byte[] input) {

// Take bytes out of the input and update
int want = BLOCK_LEN - this.blockLen; // How many bytes we need to fill up the current block
int canTake = Math.min(want, input.length);
if (canTake >= 0) System.arraycopy(input, 0, block, blockLen, canTake);
int canTake = Math.min(want, input.length - currPos);

System.arraycopy(input, currPos, block, blockLen, canTake);
blockLen += canTake;
input = Arrays.copyOfRange(input, canTake, input.length);
currPos+=canTake;
}
}

Expand Down Expand Up @@ -283,17 +285,29 @@ public Blake3(String context){
* @throws IOException If the file does not exist
*/
public void update(File file) throws IOException {
update(Files.readAllBytes(file.toPath()));
// Update the hasher 4kb at a time to avoid memory issues when hashing large files
try(InputStream ios = new FileInputStream(file)){
byte[] buffer = new byte[4096];
int read = 0;
while((read = ios.read(buffer)) != -1){
if(read == buffer.length) {
update(buffer);
} else {
update(Arrays.copyOfRange(buffer, 0, read));
}
}
}
}

/**
* Appends new data to the hash tree
* @param input Data to be added
*/
public void update(byte[] input){
while(input.length != 0) {
int currPos = 0;
while(currPos < input.length) {

// If this chunk has chained in 16 64 bytes of input, add it's CV to the stack
// If this chunk has chained in 16 64 bytes of input, add its CV to the stack
if (chunkState.len() == CHUNK_LEN) {
int[] chunkCV = chunkState.createNode().chainingValue();
long totalChunks = chunkState.chunkCounter + 1;
Expand All @@ -302,9 +316,9 @@ public void update(byte[] input){
}

int want = CHUNK_LEN - chunkState.len();
int take = Math.min(want, input.length);
chunkState.update(Arrays.copyOfRange(input, 0, take));
input = Arrays.copyOfRange(input, take, input.length);
int take = Math.min(want, input.length - currPos);
chunkState.update(Arrays.copyOfRange(input, currPos, currPos + take));
currPos+=take;
}
}

Expand Down
5 changes: 2 additions & 3 deletions src/com/github/rctcwyvrn/blake3/Examples.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@ public static void main(String[] args){
System.out.println("Success: " + hexhash);

// Hashing files
// Warning: Very slow for large files due to lack of optimizations :c
String filename = "LICENSE";
String filename = "License";
try {
Blake3 fileHasher = new Blake3();
fileHasher.update(new File(filename));
String filehash = fileHasher.hexdigest();
if (!filehash.equals("381f3baeddb0ce5202ac9528ecc787c249901e74528ba2cbc5546567a2e0bd33"))
throw new AssertionError();
throw new AssertionError(filehash);
System.out.println("Success: " + filehash);
} catch (IOException e) {
System.err.println("File not found: " + filename);
Expand Down

0 comments on commit 1632b85

Please sign in to comment.