Skip to content

Commit 6bdb539

Browse files
committed
Merge pull request #12 from clue-labs/transcode
Transcode mixed encodings to UTF-8
2 parents 44c82eb + 34587a7 commit 6bdb539

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
"php": ">=5.3",
1818
"react/event-loop": "~0.4.0|~0.3.0",
1919
"clue/buzz-react": "~0.2.0",
20-
"ext-simplexml": "*"
20+
"ext-simplexml": "*",
21+
"neitanod/forceutf8": "~1.4"
2122
},
2223
"require-dev": {
2324
"clue/block-react": "~0.1.0"

src/Io/Loader.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ public function loadXmlFile($path)
2626

2727
public function loadXmlString($html)
2828
{
29+
// log output often uses garbled ISO-8859-1 and UTF-8 encodings
30+
$html = \ForceUTF8\Encoding::toUTF8($html);
31+
2932
// fix invalid markup of outdated ViewVC versions
3033
// - help link in footer not terminated
3134
// - selected branch/tag in CVS "sticky tag" dropdown has not attribute value

tests/Io/LoaderTest.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ public function testHtmlEntities()
3939
$this->assertEquals('ä… ©', (string)$xml);
4040
}
4141

42+
public function testMixedEncodings()
43+
{
44+
// mixed UTF-8 and ISO-8859-1
45+
$str = "<p>ä and \xFC</p>";
46+
$xml = $this->loader->loadXmlString($str);
47+
48+
$this->assertEquals('ä and ü', (string)$xml);
49+
}
50+
4251
public function testLoadInvalidMarkupInputNotClosed()
4352
{
4453
$str = '<input type="hidden">';

0 commit comments

Comments
 (0)