Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ jobs:
with:
toolchain: stable
- name: Run tests
run: cargo test --verbose
run: cargo test --verbose -- --nocapture
env:
RUSTFLAGS: "-A unused_variables -A dead_code"
136 changes: 72 additions & 64 deletions src/cipher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use zeroize::Zeroize;
/// This implementation prevents timing side-channel attacks by processing
/// the table in fixed-size chunks and using bitwise masks rather than
/// conditional branching.
#[inline]
#[inline(always)]
pub fn constant_time_lookup_256(table: &[u8; 256], index: u8) -> u8 {
let mut result = 0u8;
let idx = index as usize;
Expand Down Expand Up @@ -120,7 +120,7 @@ pub fn constant_time_position_lookup(positions: &[usize; 256], value: u8) -> usi
///
/// Processes the entire character array in chunks to ensure constant execution
/// time regardless of the target index value.
#[inline]
#[inline(always)]
pub fn constant_time_character_lookup(characters: &[u8; 256], index: usize) -> u8 {
let mut result = 0u8;

Expand Down Expand Up @@ -165,6 +165,7 @@ pub fn constant_time_character_lookup(characters: &[u8; 256], index: usize) -> u
///
/// The input table must be a valid permutation (each value 0-255 appears exactly once)
/// for the inverse to be mathematically correct.
#[inline(always)]
pub fn build_inverse_lookup(forward_row: &[u8; 256]) -> [u8; 256] {
let mut inverse = [0u8; 256];
let mut i = 0usize;
Expand Down Expand Up @@ -296,12 +297,12 @@ pub fn unshift_bits_with_rot_key_par(mut buf: Vec<u8>, rot_key: &[u8]) -> Vec<u8
/// The result is sorted and deduplicated to minimize cache size and
/// eliminate redundant table generation.
#[inline]
pub fn build_pairs(key1_chars: &[usize], key2_chars: &[usize], len: usize) -> Vec<(u16, u16)> {
pub fn build_pairs(key1_chars: &[u8], key2_chars: &[u8], len: usize) -> Vec<(u16, u16)> {
let mut v = Vec::with_capacity(len.min(65536));
let mut i = 0usize;
while i < len {
let table_2d = (key1_chars[i % key1_chars.len()] & 0xFF) as u16;
let row = (key2_chars[i % key2_chars.len()] & 0xFF) as u16;
let table_2d = key1_chars[i % key1_chars.len()] as u16;
let row = key2_chars[i % key2_chars.len()] as u16;
v.push((table_2d, row));
i += 1;
}
Expand Down Expand Up @@ -410,14 +411,14 @@ pub fn build_cipher_cache(
|| {
k1_ref
.par_iter()
.map(|&c| (c as usize) & 0xFF)
.collect::<Vec<_>>()
.map(|&c| c )
.collect::<Vec<u8>>()
},
|| {
k2_ref
.par_iter()
.map(|&c| (c as usize) & 0xFF)
.collect::<Vec<_>>()
.map(|&c| c )
.collect::<Vec<u8>>()
},
);

Expand Down Expand Up @@ -493,29 +494,33 @@ pub fn encrypt_core_optimized(
if keystream.len() != this { keystream.resize(this, 0); }
blake3_stream_for_chunk(xor_key, Some(run_salt), b"xor_stream_v1", chunk_index, &mut keystream);

{
let src = &plain_text[offset..offset + this];
let dst = &mut cipher_text[offset..offset + this];

dst.par_iter_mut()
.zip(src.par_iter())
.zip(keystream.par_iter())
.enumerate()
.for_each(|(i, ((d, &s), &k))| {
let pos = offset + i;
let table_2d = cache.key1_chars[pos % cache.key1_chars.len()] & 0xFF;
let row = cache.key2_chars[pos % cache.key2_chars.len()] & 0xFF;
let map_index = (table_2d << 8) | row;

let row_idx = cache.index_map[map_index];
if row_idx != usize::MAX {
let transformed = constant_time_lookup_256(&cache.rows[row_idx], s);
*d = transformed ^ k;
} else {
*d = s ^ k;
}
});
}
// Process the chunk in parallel with explicit chunk-local variables
let k1 = &cache.key1_chars;
let k2 = &cache.key2_chars;
let rows = &cache.rows;
let index_map = &cache.index_map;

let src = &plain_text[offset..offset + this];
let dst = &mut cipher_text[offset..offset + this];

dst.par_iter_mut()
.zip(src.par_iter())
.zip(keystream.par_iter())
.enumerate()
.for_each(|(i, ((d, &s), &k))| {
let pos = offset + i;
let table_2d = k1[pos % k1.len()] as usize;
let row = k2[pos % k2.len()] as usize;
let map_index = (table_2d << 8) | row;

let row_idx = index_map[map_index];
if row_idx != usize::MAX {
let transformed = constant_time_lookup_256(&rows[row_idx], s);
*d = transformed ^ k;
} else {
*d = s ^ k;
}
});

offset += this;
chunk_index = chunk_index.wrapping_add(1);
Expand Down Expand Up @@ -570,30 +575,33 @@ pub fn decrypt_core_optimized(
if keystream.len() != this { keystream.resize(this, 0); }
blake3_stream_for_chunk(xor_key, Some(run_salt), b"xor_stream_v1", chunk_index, &mut keystream);

{
let src = &cipher_text[offset..offset + this];
let dst = &mut plain_text[offset..offset + this];

dst.par_iter_mut()
.zip(src.par_iter())
.zip(keystream.par_iter())
.enumerate()
.for_each(|(i, ((d, &s), &k))| {
let pos = offset + i;
let table_2d = cache.key1_chars[pos % cache.key1_chars.len()] & 0xFF;
let row = cache.key2_chars[pos % cache.key2_chars.len()] & 0xFF;
let map_index = (table_2d << 8) | row;

let xor_result = s ^ k;

let row_idx = cache.index_map[map_index];
if row_idx != usize::MAX {
*d = constant_time_lookup_256(&cache.inverse_rows[row_idx], xor_result);
} else {
*d = xor_result;
}
});
}
// Process the chunk in parallel with explicit chunk-local variables
let k1 = &cache.key1_chars;
let k2 = &cache.key2_chars;
let inv_rows = &cache.inverse_rows;
let index_map = &cache.index_map;

let src = &cipher_text[offset..offset + this];
let dst = &mut plain_text[offset..offset + this];

dst.par_iter_mut()
.zip(src.par_iter())
.zip(keystream.par_iter())
.enumerate()
.for_each(|(i, ((d, &s), &k))| {
let pos = offset + i;
let table_2d = k1[pos % k1.len()] as usize;
let row = k2[pos % k2.len()] as usize;
let map_index = (table_2d << 8) | row;

let xor_result = s ^ k;
let row_idx = index_map[map_index];
if row_idx != usize::MAX {
*d = constant_time_lookup_256(&inv_rows[row_idx], xor_result);
} else {
*d = xor_result;
}
});

offset += this;
chunk_index = chunk_index.wrapping_add(1);
Expand All @@ -617,12 +625,12 @@ pub fn encrypt3_final(
let rounds = std::cmp::min(ROUND, round_keys.len());
println!("Encrypting {rounds} rounds");

let mut r = 0usize;
while r < rounds {
for r in 0..rounds {
println!(" Round {}", r + 1);
let round_seed = derive_round_seed(&run_salt, r as u32);

let (xor_key, rot_key) = derive_subkeys_with_salt_and_seed(key1, key2, &run_salt, &round_seed);

let (xor_key, rot_key) =
derive_subkeys_with_salt_and_seed(key1, key2, &run_salt, &round_seed);

let cache = build_cipher_cache(key1, key2, &run_salt, &round_seed, body.len());

Expand All @@ -633,8 +641,6 @@ pub fn encrypt3_final(
let start_shift = Instant::now();
body = shift_bits_with_rot_key_par(body, &rot_key);
println!(" Bit shift: {:?}", start_shift.elapsed());

r += 1;
}

let hmac_key = derive_hmac_key_final(key1, key2, &run_salt);
Expand All @@ -646,14 +652,16 @@ pub fn encrypt3_final(

let hmac_tag = compute_hmac(&hmac_key, &header, &body);

let mut output = Vec::with_capacity(header.len() + body.len() + hmac_tag.len());
let total_len = header.len() + body.len() + hmac_tag.len();
let mut output = Vec::with_capacity(total_len);
output.extend_from_slice(&header);
output.extend_from_slice(&body);
output.extend_from_slice(&hmac_tag);

Ok(output)
}


pub fn decrypt3_final(
encrypted_data: Vec<u8>,
key1: &Secret<Vec<u8>>,
Expand Down Expand Up @@ -710,4 +718,4 @@ pub fn decrypt3_final(
}

Ok(plaintext)
}
}
83 changes: 46 additions & 37 deletions src/crypto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@ use zeroize::Zeroize;

type HmacSha256 = Hmac<Sha256>;

fn gf256_mul(mut a: u8, mut b: u8) -> u8 {
let mut res = 0u8;
while b != 0 {
if b & 1 != 0 {
res ^= a;
}
let hi = a & 0x80;
a <<= 1;
if hi != 0 {
a ^= 0x1B; // polynôme AES x^8 + x^4 + x^3 + x + 1
}
b >>= 1;
}
res
}

/// Generates a custom cryptographically secure S-Box from a 32-byte key
///
/// This function creates a non-linear, key-dependent substitution box
Expand All @@ -27,7 +43,6 @@ type HmacSha256 = Hmac<Sha256>;
/// Generates a custom cryptographically secure S-Box from a key
/// Uses multiple rounds of non-linear mixing for strong diffusion and confusion
pub fn generate_custom_sbox(key: &[u8]) -> [u8; 256] {
// AES S-box table (standard)
const AES_SBOX: [u8; 256] = [
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
Expand All @@ -47,55 +62,49 @@ pub fn generate_custom_sbox(key: &[u8]) -> [u8; 256] {
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16,
];

// derive an odd multiplier in 1..255 from key so multiplication mod 256 is invertible
let mul: u8 = if key.is_empty() {
5u8 // arbitrary odd fallback
} else {
// combine some key bytes to produce a value, force odd and non-zero
let mut v: u16 = 0x0101;
for (i, &b) in key.iter().enumerate().take(8) {
v = v.wrapping_mul((b as u16).wrapping_add((i as u16) + 1));
let mut attempt = 0u32;
loop {
let mut hasher = blake3::Hasher::new();
hasher.update(b"sbox-params");
hasher.update(key);
hasher.update(&attempt.to_le_bytes());
let mut reader = hasher.finalize_xof();

let mut mul_bytes = [0u8; 1];
reader.fill(&mut mul_bytes);
let mut mul = mul_bytes[0];
if mul == 0 { mul = 1; }

let mut mask_bytes = [0u8; 1];
reader.fill(&mut mask_bytes);
let mask = mask_bytes[0];

let mut sbox = [0u8; 256];
for i in 0..256 {
let base = AES_SBOX[i];
let t = gf256_mul(base, mul) ^ mask;
sbox[i] = t;
}
let mut m = (v as u8) | 1; // ensure odd
if m == 0 { m = 1; }
m
};

// derive XOR mask bytes from key (cycled)
let xor_mask = if key.is_empty() { vec![0x63u8] } else { key.to_vec() };

let mut sbox = [0u8; 256];
for i in 0..256 {
let base = AES_SBOX[i];
// bijective transform: multiply by odd (invertible mod 256) then xor by key-derived mask
let m = base.wrapping_mul(mul);
let k = xor_mask[i % xor_mask.len()];
sbox[i] = m ^ k;
}

// final check: ensure bijectivity (should hold). If collision found, fallback to AES S-box.
{
let mut seen = [false; 256];
let mut collision = false;
let mut ok = true;
for &v in sbox.iter() {
if seen[v as usize] {
collision = true;
ok = false;
break;
}
seen[v as usize] = true;
}
if collision {
// fallback to AES SBOX (bijective)
sbox.copy_from_slice(&AES_SBOX);

if ok {
return sbox;
} else {
attempt = attempt.wrapping_add(1);
}
}

sbox
}




/// Generates the inverse of a custom S-Box
///
/// Creates the mathematical inverse of a 256-byte substitution box,
Expand Down Expand Up @@ -478,4 +487,4 @@ pub fn build_characters(run_salt: &[u8], round_seed: &[u8; 8]) -> [u8; 256] {
/// Uses non-linear polynomial transformations and key-dependent mixing for cryptographic strength.
pub fn perm256_from_key(key: &[u8; 2048]) -> [u8; 256] {
generate_custom_sbox(key)
}
}
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ use crate::SALT_LEN;
}
}
}
assert!(max_bias < 0.2, "S-Box has detectable linear approximations (max bias: {})", max_bias);
assert!(max_bias < 0.10, "S-Box has detectable linear approximations (max bias: {})", max_bias);
}

#[test]
Expand Down Expand Up @@ -790,4 +790,4 @@ use crate::SALT_LEN;
}


}
}
6 changes: 3 additions & 3 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub const VERSION: u8 = 9;
pub const ALG_ID: u8 = 173;

/// Chunk size for Blake3 keystream generation in bytes
pub const BLAKE3_KEYSTREAM_CHUNK: usize = 16384;
pub const BLAKE3_KEYSTREAM_CHUNK: usize = 65536;

/// Optimal chunk size for parallel processing in bytes
pub const OPTIMAL_CHUNK_SIZE: usize = 65536;
Expand All @@ -41,9 +41,9 @@ pub struct CipherCache {
/// Character transformation table
pub characters: [u8; 256],
/// Key1-derived character indices
pub key1_chars: Vec<usize>,
pub key1_chars: Vec<u8>,
/// Key2-derived character indices
pub key2_chars: Vec<usize>,
pub key2_chars: Vec<u8>,
}

/// Cache key for identifying unique cipher cache entries
Expand Down