4
4
5
5
use PHPStan \PhpDocParser \Ast ;
6
6
use PHPStan \PhpDocParser \Lexer \Lexer ;
7
+ use function chr ;
8
+ use function hexdec ;
9
+ use function octdec ;
10
+ use function preg_replace_callback ;
11
+ use function str_replace ;
7
12
use function strtolower ;
8
- use function trim ;
13
+ use function substr ;
9
14
10
15
class ConstExprParser
11
16
{
12
17
18
+ private const REPLACEMENTS = [
19
+ '\\' => '\\' ,
20
+ 'n ' => "\n" ,
21
+ 'r ' => "\r" ,
22
+ 't ' => "\t" ,
23
+ 'f ' => "\f" ,
24
+ 'v ' => "\v" ,
25
+ 'e ' => "\x1B" ,
26
+ ];
27
+
28
+ /** @var bool */
29
+ private $ unescapeStrings ;
30
+
31
+ public function __construct (bool $ unescapeStrings = false )
32
+ {
33
+ $ this ->unescapeStrings = $ unescapeStrings ;
34
+ }
35
+
13
36
public function parse (TokenIterator $ tokens , bool $ trimStrings = false ): Ast \ConstExpr \ConstExprNode
14
37
{
15
38
if ($ tokens ->isCurrentTokenType (Lexer::TOKEN_FLOAT )) {
@@ -24,18 +47,14 @@ public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\Con
24
47
return new Ast \ConstExpr \ConstExprIntegerNode ($ value );
25
48
}
26
49
27
- if ($ tokens ->isCurrentTokenType (Lexer::TOKEN_SINGLE_QUOTED_STRING )) {
28
- $ value = $ tokens ->currentTokenValue ();
29
- if ($ trimStrings ) {
30
- $ value = trim ($ tokens ->currentTokenValue (), "' " );
31
- }
32
- $ tokens ->next ();
33
- return new Ast \ConstExpr \ConstExprStringNode ($ value );
34
-
35
- } elseif ($ tokens ->isCurrentTokenType (Lexer::TOKEN_DOUBLE_QUOTED_STRING )) {
50
+ if ($ tokens ->isCurrentTokenType (Lexer::TOKEN_SINGLE_QUOTED_STRING , Lexer::TOKEN_DOUBLE_QUOTED_STRING )) {
36
51
$ value = $ tokens ->currentTokenValue ();
37
52
if ($ trimStrings ) {
38
- $ value = trim ($ tokens ->currentTokenValue (), '" ' );
53
+ if ($ this ->unescapeStrings ) {
54
+ $ value = self ::unescapeString ($ value );
55
+ } else {
56
+ $ value = substr ($ value , 1 , -1 );
57
+ }
39
58
}
40
59
$ tokens ->next ();
41
60
return new Ast \ConstExpr \ConstExprStringNode ($ value );
@@ -137,4 +156,75 @@ private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprA
137
156
return new Ast \ConstExpr \ConstExprArrayItemNode ($ key , $ value );
138
157
}
139
158
159
+ private static function unescapeString (string $ string ): string
160
+ {
161
+ $ quote = $ string [0 ];
162
+
163
+ if ($ quote === '\'' ) {
164
+ return str_replace (
165
+ ['\\\\' , '\\\'' ],
166
+ ['\\' , '\'' ],
167
+ substr ($ string , 1 , -1 )
168
+ );
169
+ }
170
+
171
+ return self ::parseEscapeSequences (substr ($ string , 1 , -1 ), '" ' );
172
+ }
173
+
174
+ /**
175
+ * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
176
+ */
177
+ private static function parseEscapeSequences (string $ str , string $ quote ): string
178
+ {
179
+ $ str = str_replace ('\\' . $ quote , $ quote , $ str );
180
+
181
+ return preg_replace_callback (
182
+ '~ \\\\([ \\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~ ' ,
183
+ static function ($ matches ) {
184
+ $ str = $ matches [1 ];
185
+
186
+ if (isset (self ::REPLACEMENTS [$ str ])) {
187
+ return self ::REPLACEMENTS [$ str ];
188
+ }
189
+ if ($ str [0 ] === 'x ' || $ str [0 ] === 'X ' ) {
190
+ return chr (hexdec (substr ($ str , 1 )));
191
+ }
192
+ if ($ str [0 ] === 'u ' ) {
193
+ return self ::codePointToUtf8 (hexdec ($ matches [2 ]));
194
+ }
195
+
196
+ return chr (octdec ($ str ));
197
+ },
198
+ $ str
199
+ );
200
+ }
201
+
202
+ /**
203
+ * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
204
+ */
205
+ private static function codePointToUtf8 (int $ num ): string
206
+ {
207
+ if ($ num <= 0x7F ) {
208
+ return chr ($ num );
209
+ }
210
+ if ($ num <= 0x7FF ) {
211
+ return chr (($ num >> 6 ) + 0xC0 )
212
+ . chr (($ num & 0x3F ) + 0x80 );
213
+ }
214
+ if ($ num <= 0xFFFF ) {
215
+ return chr (($ num >> 12 ) + 0xE0 )
216
+ . chr ((($ num >> 6 ) & 0x3F ) + 0x80 )
217
+ . chr (($ num & 0x3F ) + 0x80 );
218
+ }
219
+ if ($ num <= 0x1FFFFF ) {
220
+ return chr (($ num >> 18 ) + 0xF0 )
221
+ . chr ((($ num >> 12 ) & 0x3F ) + 0x80 )
222
+ . chr ((($ num >> 6 ) & 0x3F ) + 0x80 )
223
+ . chr (($ num & 0x3F ) + 0x80 );
224
+ }
225
+
226
+ // Invalid UTF-8 codepoint escape sequence: Codepoint too large
227
+ return "\xef\xbf\xbd" ;
228
+ }
229
+
140
230
}
0 commit comments