From a82bb5342954655f681253ea0aacda36568d3d60 Mon Sep 17 00:00:00 2001 From: Marien Fressinaud Date: Thu, 3 Aug 2023 22:28:29 +0200 Subject: [PATCH] fix: Handle Response::utf8Data with unsupported encodings Sometimes, a response declares an unsupported encoding. It might be not very handy to test supported encodings as mb_list_encodings don't return all the supported encodings. My current solution is "try and see". If the conversion fails, force the conversion from UTF-8 (and replace unsupported characters by "?"). --- lib/SpiderBits/src/Response.php | 7 +++-- tests/lib/SpiderBits/ResponseTest.php | 38 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/SpiderBits/src/Response.php b/lib/SpiderBits/src/Response.php index 3e737417..7e07bef2 100644 --- a/lib/SpiderBits/src/Response.php +++ b/lib/SpiderBits/src/Response.php @@ -138,8 +138,11 @@ public function encoding(): string */ public function utf8Data(): string { - $encoding = $this->encoding(); - return mb_convert_encoding($this->data, 'utf-8', $encoding); + try { + return mb_convert_encoding($this->data, 'utf-8', $this->encoding()); + } catch (\ValueError $exception) { + return mb_convert_encoding($this->data, 'utf-8', 'utf-8'); + } } /** diff --git a/tests/lib/SpiderBits/ResponseTest.php b/tests/lib/SpiderBits/ResponseTest.php index 3a871e92..e0a5c4dd 100644 --- a/tests/lib/SpiderBits/ResponseTest.php +++ b/tests/lib/SpiderBits/ResponseTest.php @@ -178,6 +178,44 @@ public function testHeaderReturnsDefaultValueIfMissing(): void $this->assertSame('text/html', $header); } + public function testUtf8Data(): void + { + $content = 'Test ëéàçï'; + $content = mb_convert_encoding($content, 'ISO-8859-1', 'UTF-8'); + $text = <<encoding(); + $this->assertSame('ISO-8859-1', $encoding); + + $data = $response->utf8Data(); + + $this->assertSame('Test ëéàçï', $data); + } + + public function testUtf8DataWithUnsupportedEncoding(): void + { + $content = 'Test ëéàçï'; + $content = mb_convert_encoding($content, 'ISO-8859-1', 'UTF-8'); + $text = <<encoding(); + $this->assertSame('Bad-Encoding', $encoding); + + $data = $response->utf8Data(); + + $this->assertSame('Test ?????', $data); + } + public function testEncodingWithNoSpecifiedEncoding(): void { $text = <<