Skip to content

Commit

Permalink
Merge pull request #49 from thephpleague/utf8-support
Browse files Browse the repository at this point in the history
UTF-8 support
  • Loading branch information
colinodell committed Jan 6, 2015
2 parents 824243b + 8c9d004 commit 3c0a3bc
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 1,237 deletions.
6 changes: 2 additions & 4 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
"colinodell/commonmark-php": "*"
},
"require": {
"php": ">=5.3.3"
},
"suggest": {
"ext-mbstring": "Enables faster performance when normalizing link references"
"php": ">=5.3.3",
"ext-mbstring": "*"
},
"require-dev": {
"erusev/parsedown": "~1.0",
Expand Down
18 changes: 7 additions & 11 deletions src/Cursor.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Cursor
public function __construct($line)
{
$this->line = $line;
$this->length = strlen($line);
$this->length = mb_strlen($line, 'utf-8');
}

/**
Expand Down Expand Up @@ -103,7 +103,7 @@ public function getCharacter($index = null)
return null;
}

return $this->line[$index];
return mb_substr($this->line, $index, 1, 'utf-8');
}

/**
Expand All @@ -115,11 +115,7 @@ public function getCharacter($index = null)
*/
public function peek($offset = 1)
{
if (!isset($this->line[$this->currentPosition + $offset])) {
return null;
}

return $this->line[$this->currentPosition + $offset];
return $this->getCharacter($this->currentPosition + $offset);
}

/**
Expand Down Expand Up @@ -195,7 +191,7 @@ public function advanceWhileMatches($character, $maximumCharactersToAdvance = nu

$max = min($start + $maximumCharactersToAdvance, $this->length);

while ($newIndex < $max && $this->line[$newIndex] === $character) {
while ($newIndex < $max && $this->getCharacter($newIndex) === $character) {
++$newIndex;
}

Expand Down Expand Up @@ -242,7 +238,7 @@ public function getRemainder()
if ($this->isAtEnd()) {
return '';
} else {
return substr($this->line, $this->currentPosition);
return mb_substr($this->line, $this->currentPosition, $this->length, 'utf-8');
}
}

Expand Down Expand Up @@ -280,7 +276,7 @@ public function match($regex)

// [0][0] contains the matched text
// [0][1] contains the index of that match
$this->advanceBy($matches[0][1] + strlen($matches[0][0]));
$this->advanceBy($matches[0][1] + mb_strlen($matches[0][0], 'utf-8'));

return $matches[0][0];
}
Expand Down Expand Up @@ -318,6 +314,6 @@ public function getPosition()
*/
public function getPreviousText()
{
return substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition);
return mb_substr($this->line, $this->previousPosition, $this->currentPosition - $this->previousPosition, 'utf-8');
}
}
4 changes: 2 additions & 2 deletions src/Inline/Parser/CloseBracketParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,9 @@ protected function tryParseReference(Cursor $cursor, ReferenceMap $referenceMap,
$n = LinkParserHelper::parseLinkLabel($cursor);
if ($n === 0 || $n === 2) {
// Empty or missing second label
$reflabel = substr($cursor->getLine(), $opener->getIndex(), $startPos - $opener->getIndex());
$reflabel = mb_substr($cursor->getLine(), $opener->getIndex(), $startPos - $opener->getIndex(), 'utf-8');
} else {
$reflabel = substr($cursor->getLine(), $beforeLabel + 1, $n - 2);
$reflabel = mb_substr($cursor->getLine(), $beforeLabel + 1, $n - 2, 'utf-8');
}

if ($n === 0) {
Expand Down
8 changes: 0 additions & 8 deletions src/Reference/Reference.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

namespace League\CommonMark\Reference;

use League\CommonMark\Util\UnicodeCaseFolder;

/**
* Link reference
*/
Expand Down Expand Up @@ -89,12 +87,6 @@ public static function normalizeReference($string)
// leading/trailing whitespace
$string = preg_replace('/\s+/', '', trim($string));

// Convert to upper-case using Unicode case folding
// Use an alternate method if mb_strtoupper isn't available
if (!function_exists('mb_strtoupper')) {
return UnicodeCaseFolder::toUpperCase($string);
}

return mb_strtoupper($string, 'UTF-8');
}
}
2 changes: 1 addition & 1 deletion src/Util/Html5Entities.php
Original file line number Diff line number Diff line change
Expand Up @@ -2283,7 +2283,7 @@ public static function fromDecimal($number)

$entity = '&#' . $number . ';';

$converted = html_entity_decode($entity, ENT_COMPAT, 'UTF-8');
$converted = mb_decode_numericentity($entity, array(0x0, 0x2FFFF, 0, 0xFFFF), 'UTF-8');

if ($converted === $entity) {
return self::fromHex('fffd');
Expand Down
2 changes: 1 addition & 1 deletion src/Util/LinkParserHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static function parseLinkLabel(Cursor $cursor)
{
$match = $cursor->match('/^\[(?:[^\\\\\[\]]|\\\\[\[\]]){0,750}\]/');

return $match === null ? 0 : strlen($match);
return $match === null ? 0 : mb_strlen($match, 'utf-8');
}

/**
Expand Down
3 changes: 1 addition & 2 deletions src/Util/TextHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ public static function detabLine($string)

foreach ($parts as $part) {
// Calculate number of spaces; insert them followed by the non-tab contents
$lineLength = strlen(utf8_decode($line));
$amount = 4 - $lineLength % 4;
$amount = 4 - mb_strlen($line, 'UTF-8') % 4;
$line .= str_repeat(' ', $amount) . $part;
}

Expand Down
Loading

0 comments on commit 3c0a3bc

Please sign in to comment.