Skip to content

Commit

Permalink
Merge pull request overtrue#31 from xjchengo/refine-dictionary-property
Browse files Browse the repository at this point in the history
加快拼音转文字的速度
  • Loading branch information
overtrue committed Jan 18, 2016
2 parents 67369b3 + 93e3980 commit 17cd2c7
Showing 1 changed file with 31 additions and 14 deletions.
45 changes: 31 additions & 14 deletions src/Pinyin/Pinyin.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,7 @@ class Pinyin
*
* @var array
*/
protected static $dictionary;

/**
* Appends words.
*
* @var array
*/
protected static $appends = array();
protected static $dictionary = array();

/**
* Settings.
Expand All @@ -52,6 +45,12 @@ class Pinyin
'uppercase' => false,
'charset' => 'UTF-8' // GB2312,UTF-8
);
/**
* Internal charset used by this package.
*
* @var string
*/
protected static $internalCharset = 'UTF-8';

/**
* The instance.
Expand All @@ -67,8 +66,9 @@ class Pinyin
*/
private function __construct()
{
if (is_null(static::$dictionary)) {
self::$dictionary = json_decode(file_get_contents(dirname(__DIR__).'/data/dict.php'), true);
if (empty(static::$dictionary)) {
$list = json_decode(file_get_contents(dirname(__DIR__).'/data/dict.php'), true);
static::appends($list);
}
}

Expand Down Expand Up @@ -164,8 +164,8 @@ public static function parse($string, array $settings = array())
$settings = array_merge(self::$settings, $settings);

// add charset set
if (!empty($settings['charset']) && $settings['charset'] != 'UTF-8') {
$string = iconv($settings['charset'], 'UTF-8', $string);
if (!empty($settings['charset']) && $settings['charset'] != static::$internalCharset) {
$string = iconv($settings['charset'], static::$internalCharset, $string);
}

// remove non-Chinese char.
Expand Down Expand Up @@ -201,7 +201,11 @@ public static function parse($string, array $settings = array())
*/
public static function appends(array $appends)
{
static::$dictionary = array_merge(self::$dictionary, static::formatWords($appends));
$list = static::formatWords($appends);
foreach ($list as $key => $value) {
$firstChar = mb_substr($key, 0, 1, static::$internalCharset);
self::$dictionary[$firstChar][$key] = $value;
}
}

/**
Expand Down Expand Up @@ -242,7 +246,18 @@ protected function getFirstLetters($pinyin, $settings)
*/
protected function string2pinyin($string)
{
$pinyin = strtr($this->prepare($string), self::$dictionary);
$preparedString = $this->prepare($string);
$count = mb_strlen($preparedString, static::$internalCharset);
$dictionary = array();

$i = 0;
while ($i < $count) {
$char = mb_substr($preparedString, $i++, 1, static::$internalCharset);
$pinyinGroup = isset(self::$dictionary[$char]) ? self::$dictionary[$char] : array();
$dictionary = array_merge($dictionary, $pinyinGroup);
}

$pinyin = strtr($preparedString, $dictionary);

return trim(str_replace(' ', ' ', $pinyin));
}
Expand Down Expand Up @@ -272,6 +287,8 @@ public static function formatWords($words)
*/
protected static function formatDictPinyin($pinyin)
{
$pinyin = trim($pinyin);

return preg_replace_callback('/[a-z]{1,}:?\d{1}\s?/i', function ($matches) {
return strtolower($matches[0]);
}, " {$pinyin} ");
Expand Down

0 comments on commit 17cd2c7

Please sign in to comment.