99use IntlCodePointBreakIterator ;
1010use Transliterator ;
1111
12+ use function array_filter ;
1213use function array_map ;
1314use function array_shift ;
15+ use function array_slice ;
1416use function assert ;
15- use function count ;
1617use function explode ;
1718use function implode ;
1819use function in_array ;
2728use function strtolower ;
2829use function substr ;
2930use function trim ;
30- use function ucfirst ;
3131
3232/**
3333 * Utility for generating valid PHP labels from UTF-8 strings
@@ -132,67 +132,66 @@ abstract class AbstractNormalizer implements NormalizerInterface
132132 ];
133133
134134 private const ASCII_SPELLOUT = [
135- 1 => 'StartOfHeader ' ,
136- 2 => 'StartOfText ' ,
137- 3 => 'EndOfText ' ,
138- 4 => 'EndOfTransmission ' ,
135+ 1 => 'Start Of Header ' ,
136+ 2 => 'Start Of Text ' ,
137+ 3 => 'End Of Text ' ,
138+ 4 => 'End Of Transmission ' ,
139139 5 => 'Enquiry ' ,
140140 6 => 'Acknowledgement ' ,
141141 7 => 'Bell ' ,
142142 8 => 'Backspace ' ,
143- 9 => 'HorizontalTab ' ,
144- 10 => 'LineFeed ' ,
145- 11 => 'VerticalTab ' ,
146- 12 => 'FormFeed ' ,
147- 13 => 'CarriageReturn ' ,
148- 14 => 'ShiftOut ' ,
149- 15 => 'ShiftIn ' ,
150- 16 => 'DataLinkEscape ' ,
151- 17 => 'DeviceControlOne ' ,
152- 18 => 'DeviceControlTwo ' ,
153- 19 => 'DeviceControlThree ' ,
154- 20 => 'DeviceControlFour ' ,
155- 21 => 'NegativeAcknowledgement ' ,
156- 22 => 'SynchronousIdle ' ,
157- 23 => 'EndOfTransmissionBlock ' ,
143+ 9 => 'Horizontal Tab ' ,
144+ 10 => 'Line Feed ' ,
145+ 11 => 'Vertical Tab ' ,
146+ 12 => 'Form Feed ' ,
147+ 13 => 'Carriage Return ' ,
148+ 14 => 'Shift Out ' ,
149+ 15 => 'Shift In ' ,
150+ 16 => 'Data Link Escape ' ,
151+ 17 => 'Device Control One ' ,
152+ 18 => 'Device Control Two ' ,
153+ 19 => 'Device Control Three ' ,
154+ 20 => 'Device Control Four ' ,
155+ 21 => 'Negative Acknowledgement ' ,
156+ 22 => 'Synchronous Idle ' ,
157+ 23 => 'End Of Transmission Block ' ,
158158 24 => 'Cancel ' ,
159- 25 => 'EndOfMedium ' ,
159+ 25 => 'End Of Medium ' ,
160160 26 => 'Substitute ' ,
161161 27 => 'Escape ' ,
162- 28 => 'FileSeparator ' ,
163- 29 => 'GroupSeparator ' ,
164- 30 => 'RecordSeparator ' ,
165- 31 => 'UnitSeparator ' ,
166- 32 => 'Space ' ,
162+ 28 => 'File Separator ' ,
163+ 29 => 'Group Separator ' ,
164+ 30 => 'Record Separator ' ,
165+ 31 => 'Unit Separator ' ,
167166 33 => 'Exclamation ' ,
168- 34 => 'DoubleQuote ' ,
167+ 34 => 'Double Quote ' ,
169168 35 => 'Number ' ,
170169 36 => 'Dollar ' ,
171170 37 => 'Percent ' ,
172171 38 => 'Ampersand ' ,
173172 39 => 'Quote ' ,
174- 40 => 'OpenBracket ' ,
175- 41 => 'CloseBracket ' ,
173+ 40 => 'Open Bracket ' ,
174+ 41 => 'Close Bracket ' ,
176175 42 => 'Asterisk ' ,
177176 43 => 'Plus ' ,
178177 44 => 'Comma ' ,
179- 46 => 'FullStop ' ,
178+ 46 => 'Full Stop ' ,
180179 47 => 'Slash ' ,
181180 58 => 'Colon ' ,
182181 59 => 'Semicolon ' ,
183- 60 => 'LessThan ' ,
182+ 60 => 'Less Than ' ,
184183 61 => 'Equals ' ,
185- 62 => 'GreaterThan ' ,
186- 63 => 'QuestionMark ' ,
184+ 62 => 'Greater Than ' ,
185+ 63 => 'Question Mark ' ,
187186 64 => 'At ' ,
188- 91 => 'OpenSquare ' ,
187+ 91 => 'Open Square ' ,
189188 92 => 'Backslash ' ,
190- 93 => 'CloseSquare ' ,
189+ 93 => 'Close Square ' ,
191190 94 => 'Caret ' ,
192191 96 => 'Backtick ' ,
193- 123 => 'OpenCurly ' ,
192+ 123 => 'Open Curly ' ,
194193 124 => 'Pipe ' ,
195- 125 => 'CloseCurly ' ,
194+ 125 => 'Close Curly ' ,
196195 126 => 'Tilde ' ,
197196 127 => 'Delete ' ,
198197 ];
@@ -252,30 +251,36 @@ protected function toAscii(string $string): string
252251 return $ this ->spellOutNonAscii (implode (' ' , $ words ));
253252 }
254253
255- protected function separatorsToUnderscore (string $ string ): string
254+ protected function separatorsToSpace (string $ string ): string
256255 {
257- return preg_replace ('/[ ' . $ this ->separators . '\s ]+/ ' , '_ ' , trim ($ string ));
256+ return preg_replace ('/[ ' . $ this ->separators . '\s_ ]+/ ' , ' ' , trim ($ string ));
258257 }
259258
260259 protected function spellOutAscii (string $ string ): string
261260 {
262- $ chunks = str_split ($ string );
263- $ last = count ($ chunks ) - 1 ;
264- foreach (str_split ($ string ) as $ i => $ char ) {
265- if (isset (self ::ASCII_SPELLOUT [ord ($ char )])) {
266- $ char = self ::ASCII_SPELLOUT [ord ($ char )] . ($ i < $ last ? '_ ' : '' );
261+ $ speltOut = [];
262+ $ current = '' ;
263+
264+ foreach (str_split ($ string ) as $ char ) {
265+ $ ord = ord ($ char );
266+ if (! isset (self ::ASCII_SPELLOUT [$ ord ])) {
267+ $ current .= $ char ;
268+ continue ;
267269 }
268- $ chunks [$ i ] = $ char ;
270+
271+ $ speltOut [] = $ current ;
272+ $ speltOut [] = self ::ASCII_SPELLOUT [$ ord ];
273+ $ current = '' ;
269274 }
275+ $ speltOut [] = $ current ;
270276
271- return $ this ->spellOutLeadingDigits (implode ('' , $ chunks ));
277+ return $ this ->spellOutLeadingDigits (implode (' ' , $ speltOut ));
272278 }
273279
274280 protected function toCase (string $ string ): string
275281 {
276- assert (in_array ($ this ->case , self ::VALID_CASES ));
277-
278- $ parts = explode ('_ ' , $ string );
282+ /** @var list<string> $parts */
283+ $ parts = array_filter (explode (' ' , $ string ));
279284 return match ($ this ->case ) {
280285 self ::CAMEL_CASE => $ this ->toCamelCase ($ parts ),
281286 self ::PASCAL_CASE => $ this ->toPascalCase ($ parts ),
@@ -284,11 +289,11 @@ protected function toCase(string $string): string
284289 };
285290 }
286291
287- protected function sanitizeReserved (string $ string, array $ reserved ): string
292+ protected function sanitizeReserved (string $ string ): string
288293 {
289294 assert ($ this ->suffix !== null );
290295
291- if (in_array (strtolower ($ string ), $ reserved , true )) {
296+ if (in_array (strtolower ($ string ), self :: RESERVED , true )) {
292297 return $ string . $ this ->suffix ;
293298 }
294299 return $ string ;
@@ -297,10 +302,10 @@ protected function sanitizeReserved(string $string, array $reserved): string
297302 private function prepareSuffix (string |null $ suffix , string $ case ): string |null
298303 {
299304 if ($ suffix === null ) {
300- return $ suffix ;
305+ return null ;
301306 }
302307
303- if ($ suffix === '' || ! preg_match ('/^[a-zA-Z0-9_\x80-\xff]* $/ ' , $ suffix )) {
308+ if (! preg_match ('/^[a-zA-Z0-9_\x80-\xff]+ $/ ' , $ suffix )) {
304309 throw NormalizerException::invalidSuffix ($ suffix );
305310 }
306311
@@ -312,46 +317,53 @@ private function prepareSuffix(string|null $suffix, string $case): string|null
312317
313318 private function spellOutNonAscii (string $ string ): string
314319 {
315- $ speltOut = '' ;
320+ $ speltOut = [];
321+ $ current = '' ;
316322
317323 $ this ->codePoints ->setText ($ string );
318324 /** @var string $char */
319325 foreach ($ this ->codePoints ->getPartsIterator () as $ char ) {
320- $ ord = IntlChar::ord ($ char );
321- $ speltOut .= $ ord < 256 ? $ char : $ this ->spellOutNonAsciiChar ($ ord );
326+ $ ord = IntlChar::ord ($ char );
327+ if ($ ord < 256 ) {
328+ $ current .= $ char ;
329+ continue ;
330+ }
331+
332+ $ speltOut [] = $ current ;
333+ $ speltOut [] = $ this ->spellOutNonAsciiChar ($ ord );
334+ $ current = '' ;
322335 }
336+ $ speltOut [] = $ current ;
323337
324- return $ speltOut ;
338+ return implode ( ' ' , $ speltOut) ;
325339 }
326340
327341 private function spellOutNonAsciiChar (int $ ord ): string
328342 {
329343 $ speltOut = IntlChar::charName ($ ord );
330344
331- // 'EURO SIGN' -> 'Euro '
332- return implode ('' , array_map (function (string $ part ): string {
333- return $ part === 'SIGN ' ? '' : ucfirst ( strtolower ($ part) );
334- }, explode (" " , $ speltOut )));
345+ // 'EURO SIGN' -> 'euro '
346+ return implode (' ' , array_map (function (string $ part ): string {
347+ return $ part === 'SIGN ' ? '' : strtolower ($ part );
348+ }, explode (' ' , $ speltOut )));
335349 }
336350
337351 private function spellOutLeadingDigits (string $ string ): string
338352 {
339- $ chunks = str_split ($ string );
353+ $ speltOut = [];
354+ $ chunks = str_split ($ string );
340355 foreach ($ chunks as $ i => $ char ) {
341- if ($ i > 1 && $ char === '_ ' ) {
342- $ chunks [$ i ] = '' ;
343- break ;
344- }
345-
346356 $ ord = ord ($ char );
357+
347358 if (! isset (self ::DIGIT_SPELLOUT [$ ord ])) {
359+ $ speltOut [] = implode ('' , array_slice ($ chunks , $ i ));
348360 break ;
349361 }
350362
351- $ chunks [ $ i ] = self ::DIGIT_SPELLOUT [$ ord ] . ' _ ' ;
363+ $ speltOut [ ] = self ::DIGIT_SPELLOUT [$ ord ];
352364 }
353365
354- return implode ('' , $ chunks );
366+ return implode (' ' , $ speltOut );
355367 }
356368
357369 /**
0 commit comments