Skip to content

Commit a24e8b7

Browse files
authored
md5: Add inline-asm backend for LoongArch64 targets (#505)
1 parent 7aba4b5 commit a24e8b7

File tree

7 files changed

+369
-169
lines changed

7 files changed

+369
-169
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

md5/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ name = "md5"
1616

1717
[dependencies]
1818
digest = "0.10.7"
19+
cfg-if = "1.0"
1920

2021
[target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies]
2122
md5-asm = { version = "0.5", optional = true }
@@ -28,4 +29,8 @@ hex-literal = "0.2.2"
2829
default = ["std"]
2930
std = ["digest/std"]
3031
asm = ["md5-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates
32+
# Use assembly backend for LoongArch64 targets
33+
# WARNING: Bumps MSRV to 1.72. This feature SHOULD NOT be enabled by library crates
34+
loongarch64_asm = []
3135
oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57
36+
force-soft = [] # Force software implementation

md5/src/compress.rs

Lines changed: 12 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -1,165 +1,14 @@
1-
#![allow(clippy::many_single_char_names, clippy::unreadable_literal)]
2-
use core::convert::TryInto;
3-
4-
const RC: [u32; 64] = [
5-
// round 1
6-
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
7-
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
8-
// round 2
9-
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
10-
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
11-
// round 3
12-
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
13-
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
14-
// round 4
15-
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
16-
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
17-
];
18-
19-
#[inline(always)]
20-
fn op_f(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 {
21-
((x & y) | (!x & z))
22-
.wrapping_add(w)
23-
.wrapping_add(m)
24-
.wrapping_add(c)
25-
.rotate_left(s)
26-
.wrapping_add(x)
27-
}
28-
#[inline(always)]
29-
fn op_g(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 {
30-
((x & z) | (y & !z))
31-
.wrapping_add(w)
32-
.wrapping_add(m)
33-
.wrapping_add(c)
34-
.rotate_left(s)
35-
.wrapping_add(x)
36-
}
37-
38-
#[inline(always)]
39-
fn op_h(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 {
40-
(x ^ y ^ z)
41-
.wrapping_add(w)
42-
.wrapping_add(m)
43-
.wrapping_add(c)
44-
.rotate_left(s)
45-
.wrapping_add(x)
46-
}
47-
48-
#[inline(always)]
49-
fn op_i(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 {
50-
(y ^ (x | !z))
51-
.wrapping_add(w)
52-
.wrapping_add(m)
53-
.wrapping_add(c)
54-
.rotate_left(s)
55-
.wrapping_add(x)
56-
}
57-
58-
#[inline]
59-
pub fn compress_block(state: &mut [u32; 4], input: &[u8; 64]) {
60-
let mut a = state[0];
61-
let mut b = state[1];
62-
let mut c = state[2];
63-
let mut d = state[3];
64-
65-
let mut data = [0u32; 16];
66-
for (o, chunk) in data.iter_mut().zip(input.chunks_exact(4)) {
67-
*o = u32::from_le_bytes(chunk.try_into().unwrap());
68-
}
69-
70-
// round 1
71-
a = op_f(a, b, c, d, data[0], RC[0], 7);
72-
d = op_f(d, a, b, c, data[1], RC[1], 12);
73-
c = op_f(c, d, a, b, data[2], RC[2], 17);
74-
b = op_f(b, c, d, a, data[3], RC[3], 22);
75-
76-
a = op_f(a, b, c, d, data[4], RC[4], 7);
77-
d = op_f(d, a, b, c, data[5], RC[5], 12);
78-
c = op_f(c, d, a, b, data[6], RC[6], 17);
79-
b = op_f(b, c, d, a, data[7], RC[7], 22);
80-
81-
a = op_f(a, b, c, d, data[8], RC[8], 7);
82-
d = op_f(d, a, b, c, data[9], RC[9], 12);
83-
c = op_f(c, d, a, b, data[10], RC[10], 17);
84-
b = op_f(b, c, d, a, data[11], RC[11], 22);
85-
86-
a = op_f(a, b, c, d, data[12], RC[12], 7);
87-
d = op_f(d, a, b, c, data[13], RC[13], 12);
88-
c = op_f(c, d, a, b, data[14], RC[14], 17);
89-
b = op_f(b, c, d, a, data[15], RC[15], 22);
90-
91-
// round 2
92-
a = op_g(a, b, c, d, data[1], RC[16], 5);
93-
d = op_g(d, a, b, c, data[6], RC[17], 9);
94-
c = op_g(c, d, a, b, data[11], RC[18], 14);
95-
b = op_g(b, c, d, a, data[0], RC[19], 20);
96-
97-
a = op_g(a, b, c, d, data[5], RC[20], 5);
98-
d = op_g(d, a, b, c, data[10], RC[21], 9);
99-
c = op_g(c, d, a, b, data[15], RC[22], 14);
100-
b = op_g(b, c, d, a, data[4], RC[23], 20);
101-
102-
a = op_g(a, b, c, d, data[9], RC[24], 5);
103-
d = op_g(d, a, b, c, data[14], RC[25], 9);
104-
c = op_g(c, d, a, b, data[3], RC[26], 14);
105-
b = op_g(b, c, d, a, data[8], RC[27], 20);
106-
107-
a = op_g(a, b, c, d, data[13], RC[28], 5);
108-
d = op_g(d, a, b, c, data[2], RC[29], 9);
109-
c = op_g(c, d, a, b, data[7], RC[30], 14);
110-
b = op_g(b, c, d, a, data[12], RC[31], 20);
111-
112-
// round 3
113-
a = op_h(a, b, c, d, data[5], RC[32], 4);
114-
d = op_h(d, a, b, c, data[8], RC[33], 11);
115-
c = op_h(c, d, a, b, data[11], RC[34], 16);
116-
b = op_h(b, c, d, a, data[14], RC[35], 23);
117-
118-
a = op_h(a, b, c, d, data[1], RC[36], 4);
119-
d = op_h(d, a, b, c, data[4], RC[37], 11);
120-
c = op_h(c, d, a, b, data[7], RC[38], 16);
121-
b = op_h(b, c, d, a, data[10], RC[39], 23);
122-
123-
a = op_h(a, b, c, d, data[13], RC[40], 4);
124-
d = op_h(d, a, b, c, data[0], RC[41], 11);
125-
c = op_h(c, d, a, b, data[3], RC[42], 16);
126-
b = op_h(b, c, d, a, data[6], RC[43], 23);
127-
128-
a = op_h(a, b, c, d, data[9], RC[44], 4);
129-
d = op_h(d, a, b, c, data[12], RC[45], 11);
130-
c = op_h(c, d, a, b, data[15], RC[46], 16);
131-
b = op_h(b, c, d, a, data[2], RC[47], 23);
132-
133-
// round 4
134-
a = op_i(a, b, c, d, data[0], RC[48], 6);
135-
d = op_i(d, a, b, c, data[7], RC[49], 10);
136-
c = op_i(c, d, a, b, data[14], RC[50], 15);
137-
b = op_i(b, c, d, a, data[5], RC[51], 21);
138-
139-
a = op_i(a, b, c, d, data[12], RC[52], 6);
140-
d = op_i(d, a, b, c, data[3], RC[53], 10);
141-
c = op_i(c, d, a, b, data[10], RC[54], 15);
142-
b = op_i(b, c, d, a, data[1], RC[55], 21);
143-
144-
a = op_i(a, b, c, d, data[8], RC[56], 6);
145-
d = op_i(d, a, b, c, data[15], RC[57], 10);
146-
c = op_i(c, d, a, b, data[6], RC[58], 15);
147-
b = op_i(b, c, d, a, data[13], RC[59], 21);
148-
149-
a = op_i(a, b, c, d, data[4], RC[60], 6);
150-
d = op_i(d, a, b, c, data[11], RC[61], 10);
151-
c = op_i(c, d, a, b, data[2], RC[62], 15);
152-
b = op_i(b, c, d, a, data[9], RC[63], 21);
153-
154-
state[0] = state[0].wrapping_add(a);
155-
state[1] = state[1].wrapping_add(b);
156-
state[2] = state[2].wrapping_add(c);
157-
state[3] = state[3].wrapping_add(d);
158-
}
159-
160-
#[inline]
161-
pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) {
162-
for block in blocks {
163-
compress_block(state, block)
1+
cfg_if::cfg_if! {
2+
if #[cfg(feature = "force-soft")] {
3+
mod soft;
4+
pub use soft::compress;
5+
} else if #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] {
6+
pub use md5_asm::compress;
7+
} else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
8+
mod loongarch64_asm;
9+
pub use loongarch64_asm::compress;
10+
} else {
11+
mod soft;
12+
pub use soft::compress;
16413
}
16514
}

md5/src/compress/consts.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
pub const RC: [u32; 64] = [
2+
// round 1
3+
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
4+
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
5+
// round 2
6+
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
7+
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
8+
// round 3
9+
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
10+
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
11+
// round 4
12+
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
13+
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
14+
];

0 commit comments

Comments
 (0)