diff --git a/README.md b/README.md index de91432..b156700 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,21 @@ To include it for use in your project, please install via composer: composer require zbateson/mb-wrapper ``` +## Php 7 Support Dropped + +As of mb-wrapper 2.0, support for php 7 has been dropped. + ## Requirements -mb-wrapper requires PHP 7.1 or newer. Tested on PHP 7.1, 7.2, 7.3, 7.4, 8.0, 8.1, and 8.2 on GitHub Actions. +mb-wrapper requires PHP 8.0 or newer. Tested on PHP 8.0, 8.1, 8.2, and 8.3 on GitHub Actions. + +## New in 2.0 + +If converting or performing an operation on a string fails in iconv, an UnsupportedCharsetException is now thrown. ## Description -MbWrapper is intended for use wherever mb_* or iconv_* is used. It scans supported charsets returned by mb_list_encodings(), and prefers mb_* functions, but will fallback to iconv if a charset isn't supported. +MbWrapper is intended for use wherever mb_* or iconv_* is used. It scans supported charsets returned by mb_list_encodings(), and prefers mb_* functions, but will fallback to iconv if a charset isn't supported by the mb_ functions. A list of aliased charsets is maintained for both mb_* and iconv, where a supported charset exists for an alias. This is useful for mail and http parsing as other systems may report encodings not recognized by mb_* or iconv. diff --git a/composer.json b/composer.json index 65a2f1f..da20235 100644 --- a/composer.json +++ b/composer.json @@ -9,7 +9,7 @@ } ], "require": { - "php": ">=7.1", + "php": ">=8.0", "symfony/polyfill-mbstring": "^1.9", "symfony/polyfill-iconv": "^1.9" }, diff --git a/src/MbWrapper.php b/src/MbWrapper.php index 4cd390d..34830de 100644 --- a/src/MbWrapper.php +++ b/src/MbWrapper.php @@ -4,7 +4,6 @@ * * @license http://opensource.org/licenses/bsd-license.php BSD */ - namespace ZBateson\MbWrapper; /** @@ -325,6 +324,38 @@ private function getNormalizedCharset($charset) return \preg_replace('/[^A-Z0-9]+/', '', $upper); } + private function iconv(string $fromCharset, string $toCharset, string $str) : string + { + $ret = @\iconv($fromCharset, $toCharset . '//TRANSLIT//IGNORE', $str); + if ($ret === false) { + throw new UnsupportedCharsetException("Unable to convert from charsets: $fromCharset to $toCharset"); + } + return $ret; + } + + private function iconvStrlen(string $str, string $charset) : int + { + $ret = @\iconv_strlen($str, $charset . '//TRANSLIT//IGNORE'); + if ($ret === false) { + throw new UnsupportedCharsetException("Charset $charset is not supported"); + } + return $ret; + } + + private function iconvSubstr(string $str, string $charset, int $start, int $length = null) : string + { + $ret = @\iconv_substr($str, $start, $length, $charset . '//TRANSLIT//IGNORE'); + if ($ret === false) { + $strLength = $this->iconvStrlen($str, $charset); + if ($start > $strLength) { + // returns empty to keep in line with mb_substr functionality + return ''; + } + throw new UnsupportedCharsetException("Charset $charset is not supported"); + } + return $ret; + } + /** * Converts the passed string's charset from the passed $fromCharset to the * passed $toCharset @@ -333,6 +364,7 @@ private function getNormalizedCharset($charset) * back to iconv if not. If the source or destination character sets aren't * supported, a blank string is returned. * + * @throws UnsupportedCharsetException if iconv fails */ public function convert(string $str, string $fromCharset, string $toCharset) : string { @@ -347,16 +379,16 @@ public function convert(string $str, string $fromCharset, string $toCharset) : s if ($str !== '') { if ($from !== false && $to === false) { $str = \mb_convert_encoding($str, 'UTF-8', $from); - return \iconv('UTF-8', $this->getIconvAlias($toCharset) . '//TRANSLIT//IGNORE', $str); + return $this->iconv('UTF-8', $this->getIconvAlias($toCharset), $str); } elseif ($from === false && $to !== false) { - $str = \iconv($this->getIconvAlias($fromCharset), 'UTF-8//TRANSLIT//IGNORE', $str); + $str = $this->iconv($this->getIconvAlias($fromCharset), 'UTF-8', $str); return \mb_convert_encoding($str, $to, 'UTF-8'); } elseif ($from !== false && $to !== false) { return \mb_convert_encoding($str, $to, $from); } - return \iconv( + return $this->iconv( $this->getIconvAlias($fromCharset), - $this->getIconvAlias($toCharset) . '//TRANSLIT//IGNORE', + $this->getIconvAlias($toCharset), $str ); } @@ -376,12 +408,14 @@ public function checkEncoding(string $str, string $charset) : bool return \mb_check_encoding($str, $mb); } $ic = $this->getIconvAlias($charset); - return (@\iconv($ic, $ic, $str) !== false); + return (@\iconv($ic, $ic . '//TRANSLIT//IGNORE', $str) !== false); } /** * Uses either mb_strlen or iconv_strlen to return the number of characters * in the passed $str for the given $charset + * + * @throws UnsupportedCharsetException if iconv fails */ public function getLength(string $str, string $charset) : int { @@ -389,12 +423,17 @@ public function getLength(string $str, string $charset) : int if ($mb !== false) { return \mb_strlen($str, $mb); } - return \iconv_strlen($str, $this->getIconvAlias($charset) . '//TRANSLIT//IGNORE'); + return $this->iconvStrlen($str, $this->getIconvAlias($charset)); } /** * Uses either mb_substr or iconv_substr to create and return a substring of * the passed $str. + * + * If the offset provided in $start is greater than the length of the + * string, an empty string is returned. + * + * @throws UnsupportedCharsetException if iconv fails */ public function getSubstr(string $str, string $charset, int $start, ?int $length = null) : string { @@ -409,10 +448,7 @@ public function getSubstr(string $str, string $charset, int $start, ?int $length $str = $this->convert($str, $ic, 'UTF-8'); return $this->convert($this->getSubstr($str, 'UTF-8', $start, $length), 'UTF-8', $ic); } - if ($length === null) { - $length = \iconv_strlen($str, $ic . '//TRANSLIT//IGNORE'); - } - return \iconv_substr($str, $start, $length, $ic . '//TRANSLIT//IGNORE'); + return $this->iconvSubstr($str, $ic, $start, $length); } /** diff --git a/src/UnsupportedCharsetException.php b/src/UnsupportedCharsetException.php new file mode 100644 index 0000000..a8634fa --- /dev/null +++ b/src/UnsupportedCharsetException.php @@ -0,0 +1,19 @@ +expectException(UnsupportedCharsetException::class); + $test = 'This is my string'; + $converter = new MbWrapper(); + $converter->convert($converter->convert($test, 'UTF-8', 'ASDF-ABC-123'), 'ASDF-ABC-123', 'UTF-8'); + } + + public function testLengthInvalidCharset() : void + { + $this->expectException(UnsupportedCharsetException::class); + $test = 'This is my string'; + $converter = new MbWrapper(); + $converter->getLength($test, 'ASDF-ABC-123'); + } + + public function testSubstrInvalidCharset() : void + { + $this->expectException(UnsupportedCharsetException::class); + $test = 'This is my string'; + $converter = new MbWrapper(); + $converter->getSubstr($test, 'ASDF-ABC-123', 0); + } + + public function testSubstrInvalidOffset() : void + { + $test = 'Test'; + $converter = new MbWrapper(); + $this->assertEquals('', $converter->getSubstr($test, 'CP1250', 10)); + } }