Skip to content

Commit

Permalink
Merge pull request #79 from zimagen/main
Browse files Browse the repository at this point in the history
Fix binary detection for text files containing emoji
  • Loading branch information
gjtorikian authored Oct 23, 2024
2 parents 45bd029 + e2bb5ec commit 856dfd9
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
14 changes: 10 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,17 +223,23 @@ function isBinaryCheck(fileBuffer: Buffer, bytesRead: number): boolean {
return true;
} else if ((fileBuffer[i] < 7 || fileBuffer[i] > 14) && (fileBuffer[i] < 32 || fileBuffer[i] > 127)) {
// UTF-8 detection
if (fileBuffer[i] > 193 && fileBuffer[i] < 224 && i + 1 < totalBytes) {
if (fileBuffer[i] >= 0xc0 && fileBuffer[i] <= 0xdf && i + 1 < totalBytes) {
i++;
if (fileBuffer[i] > 127 && fileBuffer[i] < 192) {
if (fileBuffer[i] >= 0x80 && fileBuffer[i] <= 0xbf) {
continue;
}
} else if (fileBuffer[i] > 223 && fileBuffer[i] < 240 && i + 2 < totalBytes) {
} else if (fileBuffer[i] >= 0xe0 && fileBuffer[i] <= 0xef && i + 2 < totalBytes) {
i++;
if (fileBuffer[i] > 127 && fileBuffer[i] < 192 && fileBuffer[i + 1] > 127 && fileBuffer[i + 1] < 192) {
if (fileBuffer[i] >= 0x80 && fileBuffer[i] <= 0xbf && fileBuffer[i + 1] >= 0x80 && fileBuffer[i + 1] <= 0xbf) {
i++;
continue;
}
} else if (fileBuffer[i] >= 0xf0 && fileBuffer[i] <= 0xf7 && i + 3 < totalBytes) {
i++;
if (fileBuffer[i] >= 0x80 && fileBuffer[i] <= 0xbf && fileBuffer[i + 1] >= 0x80 && fileBuffer[i + 1] <= 0xbf && fileBuffer[i + 2] >= 0x80 && fileBuffer[i + 2] <= 0xbf) {
i += 2;
continue;
}
}

suspiciousBytes++;
Expand Down
1 change: 1 addition & 0 deletions test/fixtures/emoji.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8 emoji 📦
6 changes: 6 additions & 0 deletions test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,9 @@ describe('sync', () => {
});
});
});

it("should return false on a UTF-8 file with emoji", () => {
const file = path.join(FIXTURE_PATH, "emoji.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

0 comments on commit 856dfd9

Please sign in to comment.