Skip to content

Commit 0bcb75d

Browse files
committed
HADOOP-19012. Use CRC tables to speed up galoisFieldMultiply in CrcUtil.
1 parent 5b15b8d commit 0bcb75d

File tree

6 files changed

+253
-87
lines changed

6 files changed

+253
-87
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.ByteArrayOutputStream;
2727
import java.io.DataInputStream;
2828
import java.io.IOException;
29+
import java.util.function.ToIntFunction;
2930

3031
/**
3132
* Encapsulates logic for composing multiple CRCs into one or more combined CRCs
@@ -35,11 +36,11 @@
3536
*/
3637
@InterfaceAudience.LimitedPrivate({"Common", "HDFS", "MapReduce", "Yarn"})
3738
@InterfaceStability.Unstable
38-
public class CrcComposer {
39+
public final class CrcComposer {
3940
private static final int CRC_SIZE_BYTES = 4;
4041
private static final Logger LOG = LoggerFactory.getLogger(CrcComposer.class);
4142

42-
private final int crcPolynomial;
43+
private final ToIntFunction<Long> mod;
4344
private final int precomputedMonomialForHint;
4445
private final long bytesPerCrcHint;
4546
private final long stripeLength;
@@ -79,28 +80,14 @@ public static CrcComposer newCrcComposer(
7980
*/
8081
public static CrcComposer newStripedCrcComposer(
8182
DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) {
82-
int polynomial = DataChecksum.getCrcPolynomialForType(type);
83-
return new CrcComposer(
84-
polynomial,
85-
CrcUtil.getMonomial(bytesPerCrcHint, polynomial),
86-
bytesPerCrcHint,
87-
stripeLength);
83+
return new CrcComposer(type, bytesPerCrcHint, stripeLength);
8884
}
8985

90-
CrcComposer(
91-
int crcPolynomial,
92-
int precomputedMonomialForHint,
93-
long bytesPerCrcHint,
94-
long stripeLength) {
95-
LOG.debug(
96-
"crcPolynomial=0x{}, precomputedMonomialForHint=0x{}, "
97-
+ "bytesPerCrcHint={}, stripeLength={}",
98-
Integer.toString(crcPolynomial, 16),
99-
Integer.toString(precomputedMonomialForHint, 16),
100-
bytesPerCrcHint,
101-
stripeLength);
102-
this.crcPolynomial = crcPolynomial;
103-
this.precomputedMonomialForHint = precomputedMonomialForHint;
86+
private CrcComposer(DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) {
87+
LOG.debug("type={}, bytesPerCrcHint={}, stripeLength={}",
88+
type, bytesPerCrcHint, stripeLength);
89+
this.mod = DataChecksum.getModFunction(type);
90+
this.precomputedMonomialForHint = CrcUtil.getMonomial(bytesPerCrcHint, mod);
10491
this.bytesPerCrcHint = bytesPerCrcHint;
10592
this.stripeLength = stripeLength;
10693
}
@@ -161,10 +148,10 @@ public void update(int crcB, long bytesPerCrc) {
161148
curCompositeCrc = crcB;
162149
} else if (bytesPerCrc == bytesPerCrcHint) {
163150
curCompositeCrc = CrcUtil.composeWithMonomial(
164-
curCompositeCrc, crcB, precomputedMonomialForHint, crcPolynomial);
151+
curCompositeCrc, crcB, precomputedMonomialForHint, mod);
165152
} else {
166153
curCompositeCrc = CrcUtil.compose(
167-
curCompositeCrc, crcB, bytesPerCrc, crcPolynomial);
154+
curCompositeCrc, crcB, bytesPerCrc, mod);
168155
}
169156

170157
curPositionInStripe += bytesPerCrc;

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.classification.InterfaceStability;
2323

2424
import java.util.Arrays;
25+
import java.util.function.ToIntFunction;
2526

2627
/**
2728
* This class provides utilities for working with CRCs.
@@ -32,6 +33,55 @@ public final class CrcUtil {
3233
public static final int MULTIPLICATIVE_IDENTITY = 0x80000000;
3334
public static final int GZIP_POLYNOMIAL = 0xEDB88320;
3435
public static final int CASTAGNOLI_POLYNOMIAL = 0x82F63B78;
36+
private static final long UNIT = 0x8000_0000_0000_0000L;
37+
38+
/**
39+
* @return a * b (mod p),
40+
* where mod p is computed by the given mod function.
41+
*/
42+
static int multiplyMod(int a, int b, ToIntFunction<Long> mod) {
43+
final long left = ((long)a) << 32;
44+
final long right = ((long)b) << 32;
45+
46+
final long product
47+
= ((((((left & (UNIT /* */)) == 0L? 0L : right)
48+
^ ((left & (UNIT >>> 1)) == 0L? 0L : right >>> 1))
49+
^ (((left & (UNIT >>> 2)) == 0L? 0L : right >>> 2)
50+
^ ((left & (UNIT >>> 3)) == 0L? 0L : right >>> 3)))
51+
^ ((((left & (UNIT >>> 4)) == 0L? 0L : right >>> 4)
52+
^ ((left & (UNIT >>> 5)) == 0L? 0L : right >>> 5))
53+
^ (((left & (UNIT >>> 6)) == 0L? 0L : right >>> 6)
54+
^ ((left & (UNIT >>> 7)) == 0L? 0L : right >>> 7))))
55+
56+
^ (((((left & (UNIT >>> 8)) == 0L? 0L : right >>> 8)
57+
^ ((left & (UNIT >>> 9)) == 0L? 0L : right >>> 9))
58+
^ (((left & (UNIT >>> 10)) == 0L? 0L : right >>> 10)
59+
^ ((left & (UNIT >>> 11)) == 0L? 0L : right >>> 11)))
60+
^ ((((left & (UNIT >>> 12)) == 0L? 0L : right >>> 12)
61+
^ ((left & (UNIT >>> 13)) == 0L? 0L : right >>> 13))
62+
^ (((left & (UNIT >>> 14)) == 0L? 0L : right >>> 14)
63+
^ ((left & (UNIT >>> 15)) == 0L? 0L : right >>> 15)))))
64+
65+
^ ((((((left & (UNIT >>> 16)) == 0L? 0L : right >>> 16)
66+
^ ((left & (UNIT >>> 17)) == 0L? 0L : right >>> 17))
67+
^ (((left & (UNIT >>> 18)) == 0L? 0L : right >>> 18)
68+
^ ((left & (UNIT >>> 19)) == 0L? 0L : right >>> 19)))
69+
^ ((((left & (UNIT >>> 20)) == 0L? 0L : right >>> 20)
70+
^ ((left & (UNIT >>> 21)) == 0L? 0L : right >>> 21))
71+
^ (((left & (UNIT >>> 22)) == 0L? 0L : right >>> 22)
72+
^ ((left & (UNIT >>> 23)) == 0L? 0L : right >>> 23))))
73+
74+
^ (((((left & (UNIT >>> 24)) == 0L? 0L : right >>> 24)
75+
^ ((left & (UNIT >>> 25)) == 0L? 0L : right >>> 25))
76+
^ (((left & (UNIT >>> 26)) == 0L? 0L : right >>> 26)
77+
^ ((left & (UNIT >>> 27)) == 0L? 0L : right >>> 27)))
78+
^ ((((left & (UNIT >>> 28)) == 0L? 0L : right >>> 28)
79+
^ ((left & (UNIT >>> 29)) == 0L? 0L : right >>> 29))
80+
^ (((left & (UNIT >>> 30)) == 0L? 0L : right >>> 30)
81+
^ ((left & (UNIT >>> 31)) == 0L? 0L : right >>> 31)))));
82+
83+
return mod.applyAsInt(product);
84+
}
3585

3686
/**
3787
* Hide default constructor for a static utils class.
@@ -48,7 +98,7 @@ private CrcUtil() {
4898
* @param mod mod.
4999
* @return monomial.
50100
*/
51-
public static int getMonomial(long lengthBytes, int mod) {
101+
public static int getMonomial(long lengthBytes, ToIntFunction<Long> mod) {
52102
if (lengthBytes == 0) {
53103
return MULTIPLICATIVE_IDENTITY;
54104
} else if (lengthBytes < 0) {
@@ -67,9 +117,9 @@ public static int getMonomial(long lengthBytes, int mod) {
67117
while (degree > 0) {
68118
if ((degree & 1) != 0) {
69119
product = (product == MULTIPLICATIVE_IDENTITY) ? multiplier :
70-
galoisFieldMultiply(product, multiplier, mod);
120+
multiplyMod(product, multiplier, mod);
71121
}
72-
multiplier = galoisFieldMultiply(multiplier, multiplier, mod);
122+
multiplier = multiplyMod(multiplier, multiplier, mod);
73123
degree >>= 1;
74124
}
75125
return product;
@@ -85,8 +135,8 @@ public static int getMonomial(long lengthBytes, int mod) {
85135
* @return compose with monomial.
86136
*/
87137
public static int composeWithMonomial(
88-
int crcA, int crcB, int monomial, int mod) {
89-
return galoisFieldMultiply(crcA, monomial, mod) ^ crcB;
138+
int crcA, int crcB, int monomial, ToIntFunction<Long> mod) {
139+
return multiplyMod(crcA, monomial, mod) ^ crcB;
90140
}
91141

92142
/**
@@ -98,7 +148,7 @@ public static int composeWithMonomial(
98148
* @param mod mod.
99149
* @return compose result.
100150
*/
101-
public static int compose(int crcA, int crcB, long lengthB, int mod) {
151+
public static int compose(int crcA, int crcB, long lengthB, ToIntFunction<Long> mod) {
102152
int monomial = getMonomial(lengthB, mod);
103153
return composeWithMonomial(crcA, crcB, monomial, mod);
104154
}
@@ -199,40 +249,5 @@ public static String toMultiCrcString(final byte[] bytes) {
199249
return sb.toString();
200250
}
201251

202-
/**
203-
* Galois field multiplication of {@code p} and {@code q} with the
204-
* generator polynomial {@code m} as the modulus.
205-
*
206-
* @param m The little-endian polynomial to use as the modulus when
207-
* multiplying p and q, with implicit "1" bit beyond the bottom bit.
208-
*/
209-
private static int galoisFieldMultiply(int p, int q, int m) {
210-
int summation = 0;
211-
212-
// Top bit is the x^0 place; each right-shift increments the degree of the
213-
// current term.
214-
int curTerm = MULTIPLICATIVE_IDENTITY;
215-
216-
// Iteratively multiply p by x mod m as we go to represent the q[i] term
217-
// (of degree x^i) times p.
218-
int px = p;
219-
220-
while (curTerm != 0) {
221-
if ((q & curTerm) != 0) {
222-
summation ^= px;
223-
}
224252

225-
// Bottom bit represents highest degree since we're little-endian; before
226-
// we multiply by "x" for the next term, check bottom bit to know whether
227-
// the resulting px will thus have a term matching the implicit "1" term
228-
// of "m" and thus will need to subtract "m" after mutiplying by "x".
229-
boolean hasMaxDegree = ((px & 1) != 0);
230-
px >>>= 1;
231-
if (hasMaxDegree) {
232-
px ^= m;
233-
}
234-
curTerm >>>= 1;
235-
}
236-
return summation;
237-
}
238253
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -22,6 +22,7 @@
2222
import java.io.DataOutputStream;
2323
import java.io.IOException;
2424
import java.nio.ByteBuffer;
25+
import java.util.function.ToIntFunction;
2526
import java.util.zip.CRC32;
2627
import java.util.zip.Checksum;
2728

@@ -118,15 +119,14 @@ static Checksum newCrc32C() {
118119
* @return the int representation of the polynomial associated with the
119120
* CRC {@code type}, suitable for use with further CRC arithmetic.
120121
*/
121-
public static int getCrcPolynomialForType(Type type) {
122+
static ToIntFunction<Long> getModFunction(Type type) {
122123
switch (type) {
123124
case CRC32:
124-
return CrcUtil.GZIP_POLYNOMIAL;
125+
return PureJavaCrc32::mod;
125126
case CRC32C:
126-
return CrcUtil.CASTAGNOLI_POLYNOMIAL;
127+
return PureJavaCrc32C::mod;
127128
default:
128-
throw new IllegalArgumentException(
129-
"No CRC polynomial could be associated with type: " + type);
129+
throw new IllegalArgumentException("Unexpected type: " + type);
130130
}
131131
}
132132

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -90,6 +90,18 @@ public void update(final byte[] b, final int offset, final int len) {
9090
crc = localCrc;
9191
}
9292

93+
/**
94+
* Compute x mod p, where p is the CRC32 polynomial.
95+
* @param x the input value
96+
* @return x mod p
97+
*/
98+
public static int mod(long x) {
99+
final int y = (int)(x);
100+
return (int)(x >> 32)
101+
^ ((T[((y << 24) >>> 24) + 0x300] ^ T[((y << 16) >>> 24) + 0x200])
102+
^ (T[((y << 8) >>> 24) + 0x100] ^ T[((y /* */) >>> 24) /* */]));
103+
}
104+
93105
@Override
94106
final public void update(int b) {
95107
crc = (crc >>> 8) ^ T[(((crc ^ b) << 24) >>> 24)];

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/**
1+
/*
22
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
44
* distributed with this work for additional information
@@ -100,6 +100,18 @@ public void update(byte[] b, int off, int len) {
100100
crc = localCrc;
101101
}
102102

103+
/**
104+
* Compute x mod p, where p is the CRC32C polynomial.
105+
* @param x the input value
106+
* @return x mod p
107+
*/
108+
public static int mod(long x) {
109+
final int y = (int)(x);
110+
return (int)(x >> 32)
111+
^ ((T[((y << 24) >>> 24) + 0x300] ^ T[((y << 16) >>> 24) + 0x200])
112+
^ (T[((y << 8) >>> 24) + 0x100] ^ T[((y /* */) >>> 24) /* */]));
113+
}
114+
103115
@Override
104116
final public void update(int b) {
105117
crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)];

0 commit comments

Comments
 (0)