Skip to content

Commit

Permalink
fix: Handle Response::utf8Data with unsupported encodings
Browse files Browse the repository at this point in the history
Sometimes, a response declares an unsupported encoding.
It might be not very handy to test supported encodings as
mb_list_encodings don't return all the supported encodings.

My current solution is "try and see". If the conversion fails, force the
conversion from UTF-8 (and replace unsupported characters by "?").
  • Loading branch information
marienfressinaud committed Aug 3, 2023
1 parent 45d60fd commit a82bb53
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 2 deletions.
7 changes: 5 additions & 2 deletions lib/SpiderBits/src/Response.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,11 @@ public function encoding(): string
*/
public function utf8Data(): string
{
$encoding = $this->encoding();
return mb_convert_encoding($this->data, 'utf-8', $encoding);
try {
return mb_convert_encoding($this->data, 'utf-8', $this->encoding());
} catch (\ValueError $exception) {
return mb_convert_encoding($this->data, 'utf-8', 'utf-8');
}
}

/**
Expand Down
38 changes: 38 additions & 0 deletions tests/lib/SpiderBits/ResponseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,44 @@ public function testHeaderReturnsDefaultValueIfMissing(): void
$this->assertSame('text/html', $header);
}

public function testUtf8Data(): void
{
$content = 'Test ëéàçï';
$content = mb_convert_encoding($content, 'ISO-8859-1', 'UTF-8');
$text = <<<TEXT
HTTP/2 200 OK\r
Content-Type: text/plain; charset="ISO-8859-1"\r
\r
{$content}
TEXT;
$response = Response::fromText($text);
$encoding = $response->encoding();
$this->assertSame('ISO-8859-1', $encoding);

$data = $response->utf8Data();

$this->assertSame('Test ëéàçï', $data);
}

public function testUtf8DataWithUnsupportedEncoding(): void
{
$content = 'Test ëéàçï';
$content = mb_convert_encoding($content, 'ISO-8859-1', 'UTF-8');
$text = <<<TEXT
HTTP/2 200 OK\r
Content-Type: text/plain; charset="Bad-Encoding"\r
\r
{$content}
TEXT;
$response = Response::fromText($text);
$encoding = $response->encoding();
$this->assertSame('Bad-Encoding', $encoding);

$data = $response->utf8Data();

$this->assertSame('Test ?????', $data);
}

public function testEncodingWithNoSpecifiedEncoding(): void
{
$text = <<<TEXT
Expand Down

0 comments on commit a82bb53

Please sign in to comment.