44
55use PHPStan \PhpDocParser \Ast ;
66use PHPStan \PhpDocParser \Lexer \Lexer ;
7+ use function chr ;
8+ use function hexdec ;
9+ use function octdec ;
10+ use function preg_replace_callback ;
11+ use function str_replace ;
712use function strtolower ;
8- use function trim ;
13+ use function substr ;
914
1015class ConstExprParser
1116{
1217
18+ private const REPLACEMENTS = [
19+ '\\' => '\\' ,
20+ 'n ' => "\n" ,
21+ 'r ' => "\r" ,
22+ 't ' => "\t" ,
23+ 'f ' => "\f" ,
24+ 'v ' => "\v" ,
25+ 'e ' => "\x1B" ,
26+ ];
27+
28+ /** @var bool */
29+ private $ unescapeStrings ;
30+
31+ public function __construct (bool $ unescapeStrings = false )
32+ {
33+ $ this ->unescapeStrings = $ unescapeStrings ;
34+ }
35+
1336 public function parse (TokenIterator $ tokens , bool $ trimStrings = false ): Ast \ConstExpr \ConstExprNode
1437 {
1538 if ($ tokens ->isCurrentTokenType (Lexer::TOKEN_FLOAT )) {
@@ -24,18 +47,14 @@ public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\Con
2447 return new Ast \ConstExpr \ConstExprIntegerNode ($ value );
2548 }
2649
27- if ($ tokens ->isCurrentTokenType (Lexer::TOKEN_SINGLE_QUOTED_STRING )) {
28- $ value = $ tokens ->currentTokenValue ();
29- if ($ trimStrings ) {
30- $ value = trim ($ tokens ->currentTokenValue (), "' " );
31- }
32- $ tokens ->next ();
33- return new Ast \ConstExpr \ConstExprStringNode ($ value );
34-
35- } elseif ($ tokens ->isCurrentTokenType (Lexer::TOKEN_DOUBLE_QUOTED_STRING )) {
50+ if ($ tokens ->isCurrentTokenType (Lexer::TOKEN_SINGLE_QUOTED_STRING , Lexer::TOKEN_DOUBLE_QUOTED_STRING )) {
3651 $ value = $ tokens ->currentTokenValue ();
3752 if ($ trimStrings ) {
38- $ value = trim ($ tokens ->currentTokenValue (), '" ' );
53+ if ($ this ->unescapeStrings ) {
54+ $ value = self ::unescapeString ($ value );
55+ } else {
56+ $ value = substr ($ value , 1 , -1 );
57+ }
3958 }
4059 $ tokens ->next ();
4160 return new Ast \ConstExpr \ConstExprStringNode ($ value );
@@ -137,4 +156,75 @@ private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprA
137156 return new Ast \ConstExpr \ConstExprArrayItemNode ($ key , $ value );
138157 }
139158
159+ private static function unescapeString (string $ string ): string
160+ {
161+ $ quote = $ string [0 ];
162+
163+ if ($ quote === '\'' ) {
164+ return str_replace (
165+ ['\\\\' , '\\\'' ],
166+ ['\\' , '\'' ],
167+ substr ($ string , 1 , -1 )
168+ );
169+ }
170+
171+ return self ::parseEscapeSequences (substr ($ string , 1 , -1 ), '" ' );
172+ }
173+
174+ /**
175+ * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
176+ */
177+ private static function parseEscapeSequences (string $ str , string $ quote ): string
178+ {
179+ $ str = str_replace ('\\' . $ quote , $ quote , $ str );
180+
181+ return preg_replace_callback (
182+ '~ \\\\([ \\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~ ' ,
183+ static function ($ matches ) {
184+ $ str = $ matches [1 ];
185+
186+ if (isset (self ::REPLACEMENTS [$ str ])) {
187+ return self ::REPLACEMENTS [$ str ];
188+ }
189+ if ($ str [0 ] === 'x ' || $ str [0 ] === 'X ' ) {
190+ return chr (hexdec (substr ($ str , 1 )));
191+ }
192+ if ($ str [0 ] === 'u ' ) {
193+ return self ::codePointToUtf8 (hexdec ($ matches [2 ]));
194+ }
195+
196+ return chr (octdec ($ str ));
197+ },
198+ $ str
199+ );
200+ }
201+
202+ /**
203+ * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
204+ */
205+ private static function codePointToUtf8 (int $ num ): string
206+ {
207+ if ($ num <= 0x7F ) {
208+ return chr ($ num );
209+ }
210+ if ($ num <= 0x7FF ) {
211+ return chr (($ num >> 6 ) + 0xC0 )
212+ . chr (($ num & 0x3F ) + 0x80 );
213+ }
214+ if ($ num <= 0xFFFF ) {
215+ return chr (($ num >> 12 ) + 0xE0 )
216+ . chr ((($ num >> 6 ) & 0x3F ) + 0x80 )
217+ . chr (($ num & 0x3F ) + 0x80 );
218+ }
219+ if ($ num <= 0x1FFFFF ) {
220+ return chr (($ num >> 18 ) + 0xF0 )
221+ . chr ((($ num >> 12 ) & 0x3F ) + 0x80 )
222+ . chr ((($ num >> 6 ) & 0x3F ) + 0x80 )
223+ . chr (($ num & 0x3F ) + 0x80 );
224+ }
225+
226+ // Invalid UTF-8 codepoint escape sequence: Codepoint too large
227+ return "\xef\xbf\xbd" ;
228+ }
229+
140230}
0 commit comments