Skip to content

Commit c5ba52d

Browse files
committed
HADOOP-19012. Use CRC tables to speed up galoisFieldMultiply in CrcUtil.
1 parent 50d256e commit c5ba52d

File tree

6 files changed

+246
-87
lines changed

6 files changed

+246
-87
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.ByteArrayOutputStream;
2727
import java.io.DataInputStream;
2828
import java.io.IOException;
29+
import java.util.function.ToIntFunction;
2930

3031
/**
3132
* Encapsulates logic for composing multiple CRCs into one or more combined CRCs
@@ -39,7 +40,7 @@ public class CrcComposer {
3940
private static final int CRC_SIZE_BYTES = 4;
4041
private static final Logger LOG = LoggerFactory.getLogger(CrcComposer.class);
4142

42-
private final int crcPolynomial;
43+
private final ToIntFunction<Long> mod;
4344
private final int precomputedMonomialForHint;
4445
private final long bytesPerCrcHint;
4546
private final long stripeLength;
@@ -79,28 +80,14 @@ public static CrcComposer newCrcComposer(
7980
*/
8081
public static CrcComposer newStripedCrcComposer(
8182
DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) {
82-
int polynomial = DataChecksum.getCrcPolynomialForType(type);
83-
return new CrcComposer(
84-
polynomial,
85-
CrcUtil.getMonomial(bytesPerCrcHint, polynomial),
86-
bytesPerCrcHint,
87-
stripeLength);
83+
return new CrcComposer(type, bytesPerCrcHint, stripeLength);
8884
}
8985

90-
CrcComposer(
91-
int crcPolynomial,
92-
int precomputedMonomialForHint,
93-
long bytesPerCrcHint,
94-
long stripeLength) {
95-
LOG.debug(
96-
"crcPolynomial=0x{}, precomputedMonomialForHint=0x{}, "
97-
+ "bytesPerCrcHint={}, stripeLength={}",
98-
Integer.toString(crcPolynomial, 16),
99-
Integer.toString(precomputedMonomialForHint, 16),
100-
bytesPerCrcHint,
101-
stripeLength);
102-
this.crcPolynomial = crcPolynomial;
103-
this.precomputedMonomialForHint = precomputedMonomialForHint;
86+
private CrcComposer(DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) {
87+
LOG.debug("type={}, bytesPerCrcHint={}, stripeLength={}",
88+
type, bytesPerCrcHint, stripeLength);
89+
this.mod = DataChecksum.getModFunction(type);
90+
this.precomputedMonomialForHint = CrcUtil.getMonomial(bytesPerCrcHint, mod);
10491
this.bytesPerCrcHint = bytesPerCrcHint;
10592
this.stripeLength = stripeLength;
10693
}
@@ -161,10 +148,10 @@ public void update(int crcB, long bytesPerCrc) {
161148
curCompositeCrc = crcB;
162149
} else if (bytesPerCrc == bytesPerCrcHint) {
163150
curCompositeCrc = CrcUtil.composeWithMonomial(
164-
curCompositeCrc, crcB, precomputedMonomialForHint, crcPolynomial);
151+
curCompositeCrc, crcB, precomputedMonomialForHint, mod);
165152
} else {
166153
curCompositeCrc = CrcUtil.compose(
167-
curCompositeCrc, crcB, bytesPerCrc, crcPolynomial);
154+
curCompositeCrc, crcB, bytesPerCrc, mod);
168155
}
169156

170157
curPositionInStripe += bytesPerCrc;

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.classification.InterfaceStability;
2323

2424
import java.util.Arrays;
25+
import java.util.function.ToIntFunction;
2526

2627
/**
2728
* This class provides utilities for working with CRCs.
@@ -32,6 +33,56 @@ public final class CrcUtil {
3233
public static final int MULTIPLICATIVE_IDENTITY = 0x80000000;
3334
public static final int GZIP_POLYNOMIAL = 0xEDB88320;
3435
public static final int CASTAGNOLI_POLYNOMIAL = 0x82F63B78;
36+
private static final long UNIT = 0x8000_0000_0000_0000L;
37+
38+
/**
39+
* @return a * b (mod p),
40+
* where mod p is computed by the given mod function.
41+
*/
42+
static int multiplyMod(int a, int b, ToIntFunction<Long> mod) {
43+
final long left = ((long)a) << 32;
44+
final long right = ((long)b) << 32;
45+
46+
final long product
47+
= ((((((left & (UNIT )) == 0L? 0L : right)
48+
^ ((left & (UNIT >>> 1)) == 0L? 0L : right >>> 1))
49+
^ (((left & (UNIT >>> 2)) == 0L? 0L : right >>> 2)
50+
^ ((left & (UNIT >>> 3)) == 0L? 0L : right >>> 3)))
51+
^ ((((left & (UNIT >>> 4)) == 0L? 0L : right >>> 4)
52+
^ ((left & (UNIT >>> 5)) == 0L? 0L : right >>> 5))
53+
^ (((left & (UNIT >>> 6)) == 0L? 0L : right >>> 6)
54+
^ ((left & (UNIT >>> 7)) == 0L? 0L : right >>> 7))))
55+
56+
^ (((((left & (UNIT >>> 8)) == 0L? 0L : right >>> 8)
57+
^ ((left & (UNIT >>> 9)) == 0L? 0L : right >>> 9))
58+
^ (((left & (UNIT >>> 10)) == 0L? 0L : right >>> 10)
59+
^ ((left & (UNIT >>> 11)) == 0L? 0L : right >>> 11)))
60+
^ ((((left & (UNIT >>> 12)) == 0L? 0L : right >>> 12)
61+
^ ((left & (UNIT >>> 13)) == 0L? 0L : right >>> 13))
62+
^ (((left & (UNIT >>> 14)) == 0L? 0L : right >>> 14)
63+
^ ((left & (UNIT >>> 15)) == 0L? 0L : right >>> 15)))))
64+
65+
^ ((((((left & (UNIT >>> 16)) == 0L? 0L : right >>> 16)
66+
^ ((left & (UNIT >>> 17)) == 0L? 0L : right >>> 17))
67+
^ (((left & (UNIT >>> 18)) == 0L? 0L : right >>> 18)
68+
^ ((left & (UNIT >>> 19)) == 0L? 0L : right >>> 19)))
69+
^ ((((left & (UNIT >>> 20)) == 0L? 0L : right >>> 20)
70+
^ ((left & (UNIT >>> 21)) == 0L? 0L : right >>> 21))
71+
^ (((left & (UNIT >>> 22)) == 0L? 0L : right >>> 22)
72+
^ ((left & (UNIT >>> 23)) == 0L? 0L : right >>> 23))))
73+
74+
^ (((((left & (UNIT >>> 24)) == 0L? 0L : right >>> 24)
75+
^ ((left & (UNIT >>> 25)) == 0L? 0L : right >>> 25))
76+
^ (((left & (UNIT >>> 26)) == 0L? 0L : right >>> 26)
77+
^ ((left & (UNIT >>> 27)) == 0L? 0L : right >>> 27)))
78+
^ ((((left & (UNIT >>> 28)) == 0L? 0L : right >>> 28)
79+
^ ((left & (UNIT >>> 29)) == 0L? 0L : right >>> 29))
80+
^ (((left & (UNIT >>> 30)) == 0L? 0L : right >>> 30)
81+
^ ((left & (UNIT >>> 31)) == 0L? 0L : right >>> 31)))))
82+
;
83+
84+
return mod.applyAsInt(product);
85+
}
3586

3687
/**
3788
* Hide default constructor for a static utils class.
@@ -48,7 +99,7 @@ private CrcUtil() {
4899
* @param mod mod.
49100
* @return monomial.
50101
*/
51-
public static int getMonomial(long lengthBytes, int mod) {
102+
public static int getMonomial(long lengthBytes, ToIntFunction<Long> mod) {
52103
if (lengthBytes == 0) {
53104
return MULTIPLICATIVE_IDENTITY;
54105
} else if (lengthBytes < 0) {
@@ -67,9 +118,9 @@ public static int getMonomial(long lengthBytes, int mod) {
67118
while (degree > 0) {
68119
if ((degree & 1) != 0) {
69120
product = (product == MULTIPLICATIVE_IDENTITY) ? multiplier :
70-
galoisFieldMultiply(product, multiplier, mod);
121+
multiplyMod(product, multiplier, mod);
71122
}
72-
multiplier = galoisFieldMultiply(multiplier, multiplier, mod);
123+
multiplier = multiplyMod(multiplier, multiplier, mod);
73124
degree >>= 1;
74125
}
75126
return product;
@@ -85,8 +136,8 @@ public static int getMonomial(long lengthBytes, int mod) {
85136
* @return compose with monomial.
86137
*/
87138
public static int composeWithMonomial(
88-
int crcA, int crcB, int monomial, int mod) {
89-
return galoisFieldMultiply(crcA, monomial, mod) ^ crcB;
139+
int crcA, int crcB, int monomial, ToIntFunction<Long> mod) {
140+
return multiplyMod(crcA, monomial, mod) ^ crcB;
90141
}
91142

92143
/**
@@ -98,7 +149,7 @@ public static int composeWithMonomial(
98149
* @param mod mod.
99150
* @return compose result.
100151
*/
101-
public static int compose(int crcA, int crcB, long lengthB, int mod) {
152+
public static int compose(int crcA, int crcB, long lengthB, ToIntFunction<Long> mod) {
102153
int monomial = getMonomial(lengthB, mod);
103154
return composeWithMonomial(crcA, crcB, monomial, mod);
104155
}
@@ -199,40 +250,5 @@ public static String toMultiCrcString(final byte[] bytes) {
199250
return sb.toString();
200251
}
201252

202-
/**
203-
* Galois field multiplication of {@code p} and {@code q} with the
204-
* generator polynomial {@code m} as the modulus.
205-
*
206-
* @param m The little-endian polynomial to use as the modulus when
207-
* multiplying p and q, with implicit "1" bit beyond the bottom bit.
208-
*/
209-
private static int galoisFieldMultiply(int p, int q, int m) {
210-
int summation = 0;
211-
212-
// Top bit is the x^0 place; each right-shift increments the degree of the
213-
// current term.
214-
int curTerm = MULTIPLICATIVE_IDENTITY;
215-
216-
// Iteratively multiply p by x mod m as we go to represent the q[i] term
217-
// (of degree x^i) times p.
218-
int px = p;
219-
220-
while (curTerm != 0) {
221-
if ((q & curTerm) != 0) {
222-
summation ^= px;
223-
}
224253

225-
// Bottom bit represents highest degree since we're little-endian; before
226-
// we multiply by "x" for the next term, check bottom bit to know whether
227-
// the resulting px will thus have a term matching the implicit "1" term
228-
// of "m" and thus will need to subtract "m" after mutiplying by "x".
229-
boolean hasMaxDegree = ((px & 1) != 0);
230-
px >>>= 1;
231-
if (hasMaxDegree) {
232-
px ^= m;
233-
}
234-
curTerm >>>= 1;
235-
}
236-
return summation;
237-
}
238254
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -22,6 +22,7 @@
2222
import java.io.DataOutputStream;
2323
import java.io.IOException;
2424
import java.nio.ByteBuffer;
25+
import java.util.function.ToIntFunction;
2526
import java.util.zip.CRC32;
2627
import java.util.zip.Checksum;
2728

@@ -118,15 +119,14 @@ static Checksum newCrc32C() {
118119
* @return the int representation of the polynomial associated with the
119120
* CRC {@code type}, suitable for use with further CRC arithmetic.
120121
*/
121-
public static int getCrcPolynomialForType(Type type) {
122+
static ToIntFunction<Long> getModFunction(Type type) {
122123
switch (type) {
123124
case CRC32:
124-
return CrcUtil.GZIP_POLYNOMIAL;
125+
return PureJavaCrc32::mod;
125126
case CRC32C:
126-
return CrcUtil.CASTAGNOLI_POLYNOMIAL;
127+
return PureJavaCrc32C::mod;
127128
default:
128-
throw new IllegalArgumentException(
129-
"No CRC polynomial could be associated with type: " + type);
129+
throw new IllegalArgumentException("Unexpected type: " + type);
130130
}
131131
}
132132

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -90,6 +90,14 @@ public void update(final byte[] b, final int offset, final int len) {
9090
crc = localCrc;
9191
}
9292

93+
/** @return x mod p, where p is the CRC32 polynomial. */
94+
public static int mod(long x) {
95+
final int y = (int)(x);
96+
return (int)(x >> 32)
97+
^ ((T[((y << 24) >>> 24) + 0x300] ^ T[((y << 16) >>> 24) + 0x200])
98+
^ (T[((y << 8) >>> 24) + 0x100] ^ T[((y ) >>> 24) ]));
99+
}
100+
93101
@Override
94102
final public void update(int b) {
95103
crc = (crc >>> 8) ^ T[(((crc ^ b) << 24) >>> 24)];

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -94,6 +94,14 @@ public void update(byte[] b, int off, int len) {
9494
crc = localCrc;
9595
}
9696

97+
/** @return x mod p, where p is the CRC32C polynomial. */
98+
public static int mod(long x) {
99+
final int y = (int)(x);
100+
return (int)(x >> 32)
101+
^ ((T[((y << 24) >>> 24) + 0x300] ^ T[((y << 16) >>> 24) + 0x200])
102+
^ (T[((y << 8) >>> 24) + 0x100] ^ T[((y ) >>> 24) ]));
103+
}
104+
97105
@Override
98106
final public void update(int b) {
99107
crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)];

0 commit comments

Comments
 (0)