Skip to content

Commit 0e62b04

Browse files
HADOOP-19724. [RISC-V] Add rv64 Zbc (CLMUL) bulk CRC32 with runtime detection
Co-authored-by: gong-flying <gongxiaofei24@iscas.ac.cn>
1 parent 429f2de commit 0e62b04

File tree

1 file changed

+190
-14
lines changed
  • hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util

1 file changed

+190
-14
lines changed

hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32_riscv.c

Lines changed: 190 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,200 @@
1616
* limitations under the License.
1717
*/
1818

19+
#include <assert.h>
20+
#include <stddef.h> // for size_t
21+
#include <stdio.h>
22+
#include <string.h>
23+
24+
#include "bulk_crc32.h"
25+
#include "gcc_optimizations.h"
26+
27+
/**
28+
* Hardware-accelerated CRC32 calculation using RISC-V Zbc extension.
29+
* Uses carry-less multiply instructions (clmul/clmulh) for CRC32 (zlib
30+
* polynomial).
31+
*/
32+
33+
typedef void (*crc_pipelined_func_t)(uint32_t *, uint32_t *, uint32_t *,
34+
const uint8_t *, size_t, int);
35+
extern crc_pipelined_func_t pipelined_crc32_zlib_func;
36+
37+
#if defined(__riscv) && (__riscv_xlen == 64)
38+
39+
#define RV_CRC32_CONST_R3 0x01751997d0ULL
40+
#define RV_CRC32_CONST_R4 0x00ccaa009eULL
41+
#define RV_CRC32_CONST_R5 0x0163cd6124ULL
42+
#define RV_CRC32_MASK32 0x00000000FFFFFFFFULL
43+
#define RV_CRC32_POLY_TRUE_LE_FULL 0x01DB710641ULL
44+
#define RV_CRC32_CONST_RU 0x01F7011641ULL
45+
46+
static inline uint64_t rv_clmul(uint64_t a, uint64_t b) {
47+
uint64_t r;
48+
__asm__ volatile(
49+
".option push\n\t"
50+
".option arch, +zbc\n\t"
51+
"clmul %0, %1, %2\n\t"
52+
".option pop\n\t"
53+
: "=r"(r)
54+
: "r"(a), "r"(b));
55+
return r;
56+
}
57+
58+
static inline uint64_t rv_clmulh(uint64_t a, uint64_t b) {
59+
uint64_t r;
60+
__asm__ volatile(
61+
".option push\n\t"
62+
".option arch, +zbc\n\t"
63+
"clmulh %0, %1, %2\n\t"
64+
".option pop\n\t"
65+
: "=r"(r)
66+
: "r"(a), "r"(b));
67+
return r;
68+
}
69+
70+
static inline uint32_t rv_crc32_zlib_bitwise(uint32_t crc, const uint8_t *buf,
71+
size_t len) {
72+
uint32_t c = crc;
73+
for (size_t i = 0; i < len; ++i) {
74+
c ^= buf[i];
75+
for (int k = 0; k < 8; ++k) {
76+
uint32_t mask = -(int32_t)(c & 1);
77+
c = (c >> 1) ^ (0xEDB88320U & mask); // reflected polynomial
78+
}
79+
}
80+
return c;
81+
}
82+
83+
static uint32_t rv_crc32_zlib_clmul(uint32_t crc, const uint8_t *buf,
84+
size_t len) {
85+
const uint8_t *p = buf;
86+
size_t n = len;
87+
88+
if (n < 32) {
89+
return rv_crc32_zlib_bitwise(crc, p, n);
90+
}
91+
92+
uintptr_t mis = (uintptr_t)p & 0xF;
93+
if (unlikely(mis)) {
94+
size_t pre = 16 - mis;
95+
if (pre > n) pre = n;
96+
crc = rv_crc32_zlib_bitwise(crc, p, pre);
97+
p += pre;
98+
n -= pre;
99+
}
100+
101+
uint64_t x0 = *(const uint64_t *)(const void *)(p + 0);
102+
uint64_t x1 = *(const uint64_t *)(const void *)(p + 8);
103+
x0 ^= (uint64_t)crc;
104+
p += 16;
105+
n -= 16;
106+
107+
const uint64_t C1 = RV_CRC32_CONST_R3;
108+
const uint64_t C2 = RV_CRC32_CONST_R4;
109+
110+
while (likely(n >= 16)) {
111+
uint64_t tL = rv_clmul(C2, x1);
112+
uint64_t tH = rv_clmulh(C2, x1);
113+
uint64_t yL = rv_clmul(C1, x0);
114+
uint64_t yH = rv_clmulh(C1, x0);
115+
x0 = yL ^ tL;
116+
x1 = yH ^ tH;
117+
118+
uint64_t d0 = *(const uint64_t *)(const void *)(p + 0);
119+
uint64_t d1 = *(const uint64_t *)(const void *)(p + 8);
120+
x0 ^= d0;
121+
x1 ^= d1;
122+
p += 16;
123+
n -= 16;
124+
}
125+
126+
{
127+
uint64_t tH = rv_clmulh(x0, C2);
128+
uint64_t tL = rv_clmul(x0, C2);
129+
x0 = x1 ^ tL;
130+
x1 = tH;
131+
}
132+
133+
uint64_t hi = x1;
134+
uint64_t lo = x0;
135+
uint64_t t2 = (lo >> 32) | (hi << 32);
136+
lo &= RV_CRC32_MASK32;
137+
138+
lo = rv_clmul(RV_CRC32_CONST_R5, lo) ^ t2;
139+
uint64_t tmp = lo;
140+
lo &= RV_CRC32_MASK32;
141+
lo = rv_clmul(lo, RV_CRC32_CONST_RU);
142+
lo &= RV_CRC32_MASK32;
143+
lo = rv_clmul(lo, RV_CRC32_POLY_TRUE_LE_FULL) ^ tmp;
144+
145+
uint32_t c = (uint32_t)(lo >> 32);
146+
147+
if (n) {
148+
c = rv_crc32_zlib_bitwise(c, p, n);
149+
}
150+
return c;
151+
}
152+
19153
/**
20-
* RISC-V CRC32 hardware acceleration (placeholder)
154+
* Pipelined version of hardware-accelerated CRC32 calculation using
155+
* RISC-V Zbc carry-less multiply instructions.
21156
*
22-
* Phase 1: provide a RISC-V-specific compilation unit that currently makes
23-
* no runtime changes and falls back to the generic software path in
24-
* bulk_crc32.c. Future work will add Zbc-based acceleration and runtime
25-
* dispatch.
157+
* crc1, crc2, crc3 : Store initial checksum for each block before
158+
* calling. When it returns, updated checksums are stored.
159+
* p_buf : The base address of the data buffer. The buffer should be
160+
* at least as big as block_size * num_blocks.
161+
* block_size : The size of each block in bytes.
162+
* num_blocks : The number of blocks to work on. Min = 1, Max = 3
26163
*/
164+
static void pipelined_crc32_zlib(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3,
165+
const uint8_t *p_buf, size_t block_size,
166+
int num_blocks) {
167+
const uint8_t *p1 = p_buf;
168+
const uint8_t *p2 = p_buf + block_size;
169+
const uint8_t *p3 = p_buf + 2 * block_size;
27170

28-
#include <assert.h>
29-
#include <stddef.h> // for size_t
171+
switch (num_blocks) {
172+
case 3:
173+
*crc3 = rv_crc32_zlib_clmul(*crc3, p3, block_size);
174+
// fall through
175+
case 2:
176+
*crc2 = rv_crc32_zlib_clmul(*crc2, p2, block_size);
177+
// fall through
178+
case 1:
179+
*crc1 = rv_crc32_zlib_clmul(*crc1, p1, block_size);
180+
break;
181+
case 0:
182+
return;
183+
default:
184+
assert(0 && "BUG: Invalid number of checksum blocks");
185+
}
186+
}
30187

31-
#include "bulk_crc32.h"
32-
#include "gcc_optimizations.h"
188+
#endif // __riscv && __riscv_xlen==64
33189

34-
/* Constructor hook reserved for future HW capability detection and
35-
* function-pointer dispatch. Intentionally a no-op for the initial phase. */
36-
void __attribute__((constructor)) init_riscv_crc_support(void)
37-
{
38-
/* No-op: keep using the default software implementations. */
190+
/**
191+
* On library load, determine what sort of crc we are going to do
192+
* and set crc function pointers appropriately.
193+
*/
194+
void __attribute__((constructor)) init_cpu_support_flag(void) {
195+
#if defined(__riscv) && (__riscv_xlen == 64)
196+
// check if CPU supports Zbc.
197+
// parse /proc/cpuinfo 'isa' line for substring "zbc".
198+
FILE *f = fopen("/proc/cpuinfo", "r");
199+
if (f) {
200+
char line[256];
201+
int has_zbc = 0;
202+
while (fgets(line, sizeof(line), f)) {
203+
if ((strstr(line, "isa") || strstr(line, "extensions")) &&
204+
strstr(line, "zbc")) {
205+
has_zbc = 1;
206+
break;
207+
}
208+
}
209+
fclose(f);
210+
if (has_zbc) {
211+
pipelined_crc32_zlib_func = pipelined_crc32_zlib;
212+
}
213+
}
214+
#endif
39215
}

0 commit comments

Comments
 (0)