2727import java .util .List ;
2828import java .util .stream .IntStream ;
2929import java .util .stream .LongStream ;
30+ import org .jetbrains .annotations .Nullable ;
3031
3132final class TagStringReader {
3233 private static final int MAX_DEPTH = 512 ;
34+ private static final int HEX_RADIX = 16 ;
35+ private static final int BINARY_RADIX = 2 ;
36+ private static final int DECIMAL_RADIX = 10 ;
3337 private static final byte [] EMPTY_BYTE_ARRAY = new byte [0 ];
3438 private static final int [] EMPTY_INT_ARRAY = new int [0 ];
3539 private static final long [] EMPTY_LONG_ARRAY = new long [0 ];
@@ -238,9 +242,8 @@ public BinaryTag tag() throws StringTagParseException {
238242 *
239243 * @return a parsed tag
240244 */
241- private BinaryTag scalar () {
245+ private BinaryTag scalar () throws StringTagParseException {
242246 final StringBuilder builder = new StringBuilder ();
243- int noLongerNumericAt = -1 ;
244247 while (this .buffer .hasMore ()) {
245248 char current = this .buffer .peek ();
246249 if (current == '\\' ) { // escape -- we are significantly more lenient than original format at the moment
@@ -252,62 +255,148 @@ private BinaryTag scalar() {
252255 break ;
253256 }
254257 builder .append (current );
255- if (noLongerNumericAt == -1 && !Tokens .numeric (current )) {
256- noLongerNumericAt = builder .length ();
258+ }
259+ if (builder .length () == 0 ) {
260+ throw this .buffer .makeError ("Expected a value but got nothing" );
261+ }
262+ final String original = builder .toString (); // use unmodified string when number parsing fails
263+
264+ // Start stripping down the string so we can use Java's number parsing instead of having to write our own.
265+ // Determine the radix and strip its prefix if present
266+ final int radix = this .extractRadix (builder , original );
267+
268+ // Check for the sign before removing the type token because of hex number always needing a sign thanks to byte types
269+ final char last = builder .charAt (builder .length () - 1 );
270+ boolean hasSignToken = false ;
271+ boolean signed = radix != HEX_RADIX ; // hex defaults to unsigned
272+ if (builder .length () > 2 ) {
273+ final char signChar = builder .charAt (builder .length () - 2 );
274+ if (signChar == Tokens .TYPE_SIGNED || signChar == Tokens .TYPE_UNSIGNED ) {
275+ hasSignToken = true ;
276+ signed = signChar == Tokens .TYPE_SIGNED ;
277+ builder .deleteCharAt (builder .length () - 2 );
257278 }
258279 }
259280
260- final int length = builder .length ();
261- final String built = builder .toString ();
262- if (noLongerNumericAt == length && length > 1 ) {
263- final char last = built .charAt (length - 1 );
281+ // Check for the type token and make sure we didn't fall into the hex trap (e.g. 0xAB)
282+ boolean hasTypeToken = false ;
283+ char typeToken = Tokens .TYPE_INT ;
284+ if (Tokens .numericType (last ) && (hasSignToken || radix != HEX_RADIX )) {
285+ hasTypeToken = true ;
286+ typeToken = Character .toLowerCase (last );
287+ builder .deleteCharAt (builder .length () - 1 );
288+ }
289+
290+ if (!signed && (typeToken == Tokens .TYPE_FLOAT || typeToken == Tokens .TYPE_DOUBLE )) {
291+ throw this .buffer .makeError ("Cannot create unsigned floating point numbers" );
292+ }
293+
294+ final String strippedString = builder .toString ().replace ("_" , "" );
295+ if (hasTypeToken ) {
264296 try {
265- switch (Character .toLowerCase (last )) { // try to read and return as a number
266- case Tokens .TYPE_BYTE :
267- return ByteBinaryTag .byteBinaryTag (Byte .parseByte (built .substring (0 , length - 1 )));
268- case Tokens .TYPE_SHORT :
269- return ShortBinaryTag .shortBinaryTag (Short .parseShort (built .substring (0 , length - 1 )));
270- case Tokens .TYPE_INT :
271- return IntBinaryTag .intBinaryTag (Integer .parseInt (built .substring (0 , length - 1 )));
272- case Tokens .TYPE_LONG :
273- return LongBinaryTag .longBinaryTag (Long .parseLong (built .substring (0 , length - 1 )));
274- case Tokens .TYPE_FLOAT :
275- final float floatValue = Float .parseFloat (built .substring (0 , length - 1 ));
276- if (Float .isFinite (floatValue )) { // don't accept NaN and Infinity
277- return FloatBinaryTag .floatBinaryTag (floatValue );
278- }
279- break ;
280- case Tokens .TYPE_DOUBLE :
281- final double doubleValue = Double .parseDouble (built .substring (0 , length - 1 ));
282- if (Double .isFinite (doubleValue )) { // don't accept NaN and Infinity
283- return DoubleBinaryTag .doubleBinaryTag (doubleValue );
284- }
285- break ;
297+ final NumberBinaryTag tag = this .parseNumberTag (strippedString , typeToken , radix , signed );
298+ if (tag != null ) {
299+ return tag ;
286300 }
287301 } catch (final NumberFormatException ignored ) {
288302 // not a numeric tag of the appropriate type
289303 }
290- } else if ( noLongerNumericAt == - 1 ) { // if we run out of content without an explicit value separator, then we're either an integer or string tag -- all others have a character at the end
304+ } else { // default to int or double parsing before falling back to string
291305 try {
292- return IntBinaryTag .intBinaryTag (Integer .parseInt (built ));
306+ return IntBinaryTag .intBinaryTag (this .parseInt (strippedString , radix , signed ));
293307 } catch (final NumberFormatException ex ) {
294- if (built .indexOf ('.' ) != -1 ) { // see if we have an unsuffixed double; always needs a dot
308+ if (strippedString .indexOf ('.' ) != -1 ) { // see if we have an unsuffixed double; always needs a dot
295309 try {
296- return DoubleBinaryTag .doubleBinaryTag (Double .parseDouble (built ));
310+ return DoubleBinaryTag .doubleBinaryTag (Double .parseDouble (strippedString ));
297311 } catch (final NumberFormatException ex2 ) {
298312 // ignore
299313 }
300314 }
301315 }
302316 }
303317
304- if (built .equalsIgnoreCase (Tokens .LITERAL_TRUE )) {
318+ if (original .equalsIgnoreCase (Tokens .LITERAL_TRUE )) {
305319 return ByteBinaryTag .ONE ;
306- } else if (built .equalsIgnoreCase (Tokens .LITERAL_FALSE )) {
320+ } else if (original .equalsIgnoreCase (Tokens .LITERAL_FALSE )) {
307321 return ByteBinaryTag .ZERO ;
308322 }
309- return StringBinaryTag .stringBinaryTag (built );
323+ return StringBinaryTag .stringBinaryTag (original );
324+ }
325+
326+ private int extractRadix (final StringBuilder builder , final String original ) {
327+ int radixPrefixOffset = 0 ;
328+ final int radix ;
329+ final char first = builder .charAt (0 );
330+ if (first == '+' || first == '-' ) {
331+ radixPrefixOffset = 1 ;
332+ }
333+ if (original .startsWith ("0b" , radixPrefixOffset ) || original .startsWith ("0B" , radixPrefixOffset )) {
334+ radix = BINARY_RADIX ;
335+ } else if (original .startsWith ("0x" , radixPrefixOffset ) || original .startsWith ("0X" , radixPrefixOffset )) {
336+ radix = HEX_RADIX ;
337+ } else {
338+ radix = DECIMAL_RADIX ;
339+ }
340+ if (radix != DECIMAL_RADIX ) {
341+ builder .delete (radixPrefixOffset , 2 + radixPrefixOffset );
342+ }
343+ return radix ;
344+ }
345+
346+ private @ Nullable NumberBinaryTag parseNumberTag (final String s , final char typeToken , final int radix , final boolean signed ) {
347+ switch (typeToken ) {
348+ case Tokens .TYPE_BYTE :
349+ return ByteBinaryTag .byteBinaryTag (this .parseByte (s , radix , signed ));
350+ case Tokens .TYPE_SHORT :
351+ return ShortBinaryTag .shortBinaryTag (this .parseShort (s , radix , signed ));
352+ case Tokens .TYPE_INT :
353+ return IntBinaryTag .intBinaryTag (this .parseInt (s , radix , signed ));
354+ case Tokens .TYPE_LONG :
355+ return LongBinaryTag .longBinaryTag (this .parseLong (s , radix , signed ));
356+ case Tokens .TYPE_FLOAT :
357+ final float floatValue = Float .parseFloat (s );
358+ if (Float .isFinite (floatValue )) { // don't accept NaN and Infinity
359+ return FloatBinaryTag .floatBinaryTag (floatValue );
360+ }
361+ break ;
362+ case Tokens .TYPE_DOUBLE :
363+ final double doubleValue = Double .parseDouble (s );
364+ if (Double .isFinite (doubleValue )) { // don't accept NaN and Infinity
365+ return DoubleBinaryTag .doubleBinaryTag (doubleValue );
366+ }
367+ break ;
368+ }
369+ return null ;
370+ }
371+
372+ private byte parseByte (final String s , final int radix , final boolean signed ) {
373+ if (signed ) {
374+ return Byte .parseByte (s , radix );
375+ }
376+ final int parsedInt = Integer .parseInt (s , radix );
377+ if (parsedInt >> Byte .SIZE == 0 ) {
378+ return (byte ) parsedInt ;
379+ }
380+ throw new NumberFormatException ();
381+ }
382+
383+ private short parseShort (final String s , final int radix , final boolean signed ) {
384+ if (signed ) {
385+ return Short .parseShort (s , radix );
386+ }
387+ final int parsedInt = Integer .parseInt (s , radix );
388+ if (parsedInt >> Short .SIZE == 0 ) {
389+ return (short ) parsedInt ;
390+ }
391+ throw new NumberFormatException ();
392+ }
393+
394+ private int parseInt (final String s , final int radix , final boolean signed ) {
395+ return signed ? Integer .parseInt (s , radix ) : Integer .parseUnsignedInt (s , radix );
396+ }
310397
398+ private long parseLong (final String s , final int radix , final boolean signed ) {
399+ return signed ? Long .parseLong (s , radix ) : Long .parseUnsignedLong (s , radix );
311400 }
312401
313402 private boolean separatorOrCompleteWith (final char endCharacter ) throws StringTagParseException {
0 commit comments