Skip to content

Commit 640068c

Browse files
Detect binary file by NULL byte (#219)
* Detect binary file by NULL byte * Add description where the binary check and values come from --------- Co-authored-by: Patrick Beuks <code@beuks.net>
1 parent ba22bb2 commit 640068c

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

src/Gitonomy/Git/Blob.php

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
*/
2020
class Blob
2121
{
22+
/**
23+
* @var int Size that git uses to look for NULL byte: https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193
24+
*/
25+
private const FIRST_FEW_BYTES = 8000;
26+
2227
/**
2328
* @var Repository
2429
*/
@@ -39,6 +44,11 @@ class Blob
3944
*/
4045
protected $mimetype;
4146

47+
/**
48+
* @var bool
49+
*/
50+
protected $text;
51+
4252
/**
4353
* @param Repository $repository Repository where the blob is located
4454
* @param string $hash Hash of the blob
@@ -89,6 +99,9 @@ public function getMimetype()
8999
/**
90100
* Determines if file is binary.
91101
*
102+
* Uses the same check that git uses to determine if a file is binary or not
103+
* https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193
104+
*
92105
* @return bool
93106
*/
94107
public function isBinary()
@@ -99,10 +112,17 @@ public function isBinary()
99112
/**
100113
* Determines if file is text.
101114
*
115+
* Uses the same check that git uses to determine if a file is binary or not
116+
* https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.44.0#n193
117+
*
102118
* @return bool
103119
*/
104120
public function isText()
105121
{
106-
return (bool) preg_match('#^text/|^application/xml#', $this->getMimetype());
122+
if (null === $this->text) {
123+
$this->text = !str_contains(substr($this->getContent(), 0, self::FIRST_FEW_BYTES), chr(0));
124+
}
125+
126+
return $this->text;
107127
}
108128
}

tests/Gitonomy/Git/Tests/BlobTest.php

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ public function getReadmeBlob($repository)
2323
return $repository->getCommit(self::LONGFILE_COMMIT)->getTree()->resolvePath('README.md');
2424
}
2525

26+
public function getImageBlob($repository)
27+
{
28+
return $repository->getCommit(self::LONGFILE_COMMIT)->getTree()->resolvePath('image.jpg');
29+
}
30+
2631
/**
2732
* @dataProvider provideFoobar
2833
*/
@@ -67,16 +72,20 @@ public function testGetMimetype($repository)
6772
*/
6873
public function testIsText($repository)
6974
{
70-
$blob = $this->getReadmeBlob($repository);
71-
$this->assertTrue($blob->isText());
75+
$readmeBlob = $this->getReadmeBlob($repository);
76+
$this->assertTrue($readmeBlob->isText());
77+
$imageBlob = $this->getImageBlob($repository);
78+
$this->assertFalse($imageBlob->isText());
7279
}
7380

7481
/**
7582
* @dataProvider provideFoobar
7683
*/
7784
public function testIsBinary($repository)
7885
{
79-
$blob = $this->getReadmeBlob($repository);
80-
$this->assertFalse($blob->isBinary());
86+
$readmeBlob = $this->getReadmeBlob($repository);
87+
$this->assertFalse($readmeBlob->isBinary());
88+
$imageBlob = $this->getImageBlob($repository);
89+
$this->assertTrue($imageBlob->isBinary());
8190
}
8291
}

0 commit comments

Comments
 (0)