Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,18 @@ class FastDoubleSwar {
public static int countUpToEightDigitsUtf8(long chunk) {
long val = chunk - 0x3030303030303030L;
long predicate = ((chunk + 0x4646464646464646L) | val) & 0x8080808080808080L;
return predicate == 0L ? 8 : Long.numberOfTrailingZeros(predicate) >> 3;
return predicate == 0L ? 8 : Long.numberOfLeadingZeros(predicate) >> 3;
}

public static int countUpToFourDigitsUtf16(long chunk) {
long val = chunk - 0x0030_0030_0030_0030L;
long predicate = ((chunk + 0x0046_0046_0046_0046L) | val) & 0xff80_ff80_ff80_ff80L;
return predicate == 0L ? 4 : Long.numberOfLeadingZeros(predicate) >> 4;
}

public static int countUpToEightDigitsUtf16(long first, long second) {
int digits = countUpToFourDigitsUtf16(first);
return digits < 4 ? digits : 4 + countUpToFourDigitsUtf16(second);
}

/**
Expand Down Expand Up @@ -163,6 +174,23 @@ public static boolean isEightZeroesUtf8(long chunk) {
return chunk == 0x3030303030303030L;
}

public static int parseEightDigitsUtf8(long chunk) {
// Subtract the character '0' from all characters.
long val = chunk - 0x3030303030303030L;

return parseBcd(val);
}

private static int parseBcd(long val) {
// The last 2 multiplications are independent of each other.
long mask = 0xff_000000ffL;
long mul1 = 100 + (100_0000L << 32);
long mul2 = 1 + (1_0000L << 32);
val = val * 10 + (val >>> 8);// same as: val = val * (1 + (10 << 8)) >>> 8;
val = (val & mask) * mul1 + (val >>> 16 & mask) * mul2 >>> 32;
return (int) val;
}

public static int parseEightDigitsUtf16(long first, long second) {
long fval = first - 0x0030_0030_0030_0030L;
long sval = second - 0x0030_0030_0030_0030L;
Expand Down Expand Up @@ -299,13 +327,7 @@ public static int tryToParseEightDigitsUtf8(long chunk) {
return -1;
}

// The last 2 multiplications are independent of each other.
long mask = 0xff_000000ffL;
long mul1 = 100 + (100_0000L << 32);
long mul2 = 1 + (1_0000L << 32);
val = val * 10 + (val >>> 8);// same as: val = val * (1 + (10 << 8)) >>> 8;
val = (val & mask) * mul1 + (val >>> 16 & mask) * mul2 >>> 32;
return (int) val;
return parseBcd(val);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* @(#)FastDoubleSwarTest.java
* Copyright © 2023 Werner Randelshofer, Switzerland. MIT License.
*/
package ch.randelshofer.fastdoubleparser;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import java.util.stream.IntStream;

import static org.junit.jupiter.api.Assertions.*;

class FastDoubleSwarTest {
private static final long EIGHT_ZERO_DIGITS_UTF8 = 0x30_30_30_30_30_30_30_30L;
private static final long FOUR_ZERO_DIGITS_UTF16 = 0x0030_0030_0030_0030L;
private static final long ONE_INVALID_DIGIT = 0x40;// ampersand
private static final long INTERFERENCE_DIGITS = 0x4030300040303080L;

private static IntStream utf8charPositionInLong() {
return IntStream.range(1, 9);
}

private static long invalidUtf8char(int position) {
return EIGHT_ZERO_DIGITS_UTF8 | ONE_INVALID_DIGIT << 8 * (8 - position);
}

@ParameterizedTest(name = "invalid UTF-8 character at position {0}")
@MethodSource("utf8charPositionInLong")
public void countUpToEightDigitsUtf8_invalid(int invalidCharacterPosition) {
int expected = (invalidCharacterPosition - 1);
int actual = FastDoubleSwar.countUpToEightDigitsUtf8(invalidUtf8char(invalidCharacterPosition));

assertEquals(expected, actual, Long.toString(actual, 16));
}

@Test
public void countUpToEightDigitsUtf8_valid() {
assertEquals(8, FastDoubleSwar.countUpToEightDigitsUtf8(EIGHT_ZERO_DIGITS_UTF8));
}

private static IntStream utf16charPositionInLong() {
return IntStream.range(1, 5);
}

private static long invalidUtf16char(int position) {
return FOUR_ZERO_DIGITS_UTF16 | ONE_INVALID_DIGIT << 16 * (4 - position);
}

@ParameterizedTest(name = "invalid UTF-16 character in first chunk at position {0}")
@MethodSource("utf16charPositionInLong")
public void countUpToEightDigitsUtf16_firstInvalid(int invalidCharacterPosition) {
long first = invalidUtf16char(invalidCharacterPosition);
long second = INTERFERENCE_DIGITS;

int expected = invalidCharacterPosition - 1;
int actual = FastDoubleSwar.countUpToEightDigitsUtf16(first, second);

assertEquals(expected, actual, Long.toString(first, 16));
}

@ParameterizedTest(name = "invalid UTF-16 character in second chunk at position {0}")
@MethodSource("utf16charPositionInLong")
public void countUpToEightDigitsUtf16_secondInvalid(int invalidCharacterPosition) {
long first = FOUR_ZERO_DIGITS_UTF16;
long second = invalidUtf16char(invalidCharacterPosition);

int expected = 4 + invalidCharacterPosition - 1;
int actual = FastDoubleSwar.countUpToEightDigitsUtf16(first, second);

assertEquals(expected, actual, Long.toString(second, 16));
}

@Test
public void countUpToEightDigitsUtf16_bothValid() {
assertEquals(8, FastDoubleSwar.countUpToEightDigitsUtf16(FOUR_ZERO_DIGITS_UTF16, FOUR_ZERO_DIGITS_UTF16));
}
}