|
| 1 | +//! Base85 (Ascii85) encoding and decoding |
| 2 | +//! |
| 3 | +//! Ascii85 is a form of binary-to-text encoding developed by Adobe Systems. |
| 4 | +//! It encodes 4 bytes into 5 ASCII characters from the range 33-117 ('!' to 'u'). |
| 5 | +//! |
| 6 | +//! # References |
| 7 | +//! - [Wikipedia: Ascii85](https://en.wikipedia.org/wiki/Ascii85) |
| 8 | +
|
| 9 | +/// Converts a base-10 number to base-85 representation |
| 10 | +fn base10_to_85(mut d: u32) -> String { |
| 11 | + if d == 0 { |
| 12 | + return String::new(); |
| 13 | + } |
| 14 | + |
| 15 | + let mut result = String::new(); |
| 16 | + while d > 0 { |
| 17 | + result.push((d % 85 + 33) as u8 as char); |
| 18 | + d /= 85; |
| 19 | + } |
| 20 | + result |
| 21 | +} |
| 22 | + |
| 23 | +/// Converts base-85 digits to a base-10 number |
| 24 | +fn base85_to_10(digits: &[u8]) -> u32 { |
| 25 | + digits |
| 26 | + .iter() |
| 27 | + .rev() |
| 28 | + .enumerate() |
| 29 | + .map(|(i, &ch)| (ch as u32) * 85_u32.pow(i as u32)) |
| 30 | + .sum() |
| 31 | +} |
| 32 | + |
| 33 | +/// Encodes binary data using Base85 encoding |
| 34 | +/// |
| 35 | +/// # Arguments |
| 36 | +/// * `data` - The binary data to encode |
| 37 | +/// |
| 38 | +/// # Returns |
| 39 | +/// * `Vec<u8>` - The Base85 encoded data |
| 40 | +/// |
| 41 | +/// # Examples |
| 42 | +/// ``` |
| 43 | +/// use the_algorithms_rust::ciphers::base85_encode; |
| 44 | +/// |
| 45 | +/// assert_eq!(base85_encode(b""), b""); |
| 46 | +/// assert_eq!(base85_encode(b"12345"), b"0etOA2#"); |
| 47 | +/// assert_eq!(base85_encode(b"base 85"), b"@UX=h+?24"); |
| 48 | +/// ``` |
| 49 | +pub fn base85_encode(data: &[u8]) -> Vec<u8> { |
| 50 | + if data.is_empty() { |
| 51 | + return Vec::new(); |
| 52 | + } |
| 53 | + |
| 54 | + // Convert input bytes to binary string |
| 55 | + let mut binary_data = String::new(); |
| 56 | + for &byte in data { |
| 57 | + use std::fmt::Write; |
| 58 | + write!(&mut binary_data, "{byte:08b}").unwrap(); |
| 59 | + } |
| 60 | + |
| 61 | + // Calculate padding needed to make length a multiple of 32 |
| 62 | + let remainder = binary_data.len() % 32; |
| 63 | + let null_values = if remainder == 0 { |
| 64 | + 0 |
| 65 | + } else { |
| 66 | + (32 - remainder) / 8 |
| 67 | + }; |
| 68 | + |
| 69 | + // Pad binary data to multiple of 32 bits |
| 70 | + while !binary_data.len().is_multiple_of(32) { |
| 71 | + binary_data.push('0'); |
| 72 | + } |
| 73 | + |
| 74 | + // Split into 32-bit chunks and convert to base-85 |
| 75 | + let mut result = String::new(); |
| 76 | + for chunk in binary_data.as_bytes().chunks(32) { |
| 77 | + let chunk_str = std::str::from_utf8(chunk).unwrap(); |
| 78 | + let value = u32::from_str_radix(chunk_str, 2).unwrap(); |
| 79 | + let mut encoded = base10_to_85(value); |
| 80 | + |
| 81 | + // Reverse the string (as per original Python logic) |
| 82 | + encoded = encoded.chars().rev().collect(); |
| 83 | + result.push_str(&encoded); |
| 84 | + } |
| 85 | + |
| 86 | + // Remove padding characters if necessary |
| 87 | + if null_values % 4 != 0 { |
| 88 | + let trim_len = result.len() - null_values; |
| 89 | + result.truncate(trim_len); |
| 90 | + } |
| 91 | + |
| 92 | + result.into_bytes() |
| 93 | +} |
| 94 | + |
| 95 | +/// Decodes Base85 encoded data back to binary |
| 96 | +/// |
| 97 | +/// # Arguments |
| 98 | +/// * `data` - The Base85 encoded data to decode |
| 99 | +/// |
| 100 | +/// # Returns |
| 101 | +/// * `Vec<u8>` - The decoded binary data |
| 102 | +/// |
| 103 | +/// # Examples |
| 104 | +/// ``` |
| 105 | +/// use the_algorithms_rust::ciphers::base85_decode; |
| 106 | +/// |
| 107 | +/// assert_eq!(base85_decode(b""), b""); |
| 108 | +/// assert_eq!(base85_decode(b"0etOA2#"), b"12345"); |
| 109 | +/// assert_eq!(base85_decode(b"@UX=h+?24"), b"base 85"); |
| 110 | +/// ``` |
| 111 | +pub fn base85_decode(data: &[u8]) -> Vec<u8> { |
| 112 | + if data.is_empty() { |
| 113 | + return Vec::new(); |
| 114 | + } |
| 115 | + |
| 116 | + // Calculate padding needed |
| 117 | + let remainder = data.len() % 5; |
| 118 | + let null_values = if remainder == 0 { 0 } else { 5 - remainder }; |
| 119 | + |
| 120 | + // Create padded data |
| 121 | + let mut padded_data = data.to_vec(); |
| 122 | + padded_data.extend(std::iter::repeat_n(b'u', null_values)); |
| 123 | + |
| 124 | + // Process in 5-byte chunks |
| 125 | + let mut results = Vec::new(); |
| 126 | + for chunk in padded_data.chunks(5) { |
| 127 | + // Convert ASCII characters to base-85 digits |
| 128 | + let b85_segment: Vec<u8> = chunk.iter().map(|&b| b - 33).collect(); |
| 129 | + |
| 130 | + // Convert base-85 to base-10 |
| 131 | + let value = base85_to_10(&b85_segment); |
| 132 | + |
| 133 | + // Convert to binary string (32 bits) |
| 134 | + let binary = format!("{value:032b}"); |
| 135 | + results.push(binary); |
| 136 | + } |
| 137 | + |
| 138 | + // Convert binary strings to characters |
| 139 | + let mut char_chunks = Vec::new(); |
| 140 | + for binary_str in results { |
| 141 | + for byte_str in binary_str.as_bytes().chunks(8) { |
| 142 | + let byte_string = std::str::from_utf8(byte_str).unwrap(); |
| 143 | + let byte_value = u8::from_str_radix(byte_string, 2).unwrap(); |
| 144 | + char_chunks.push(byte_value); |
| 145 | + } |
| 146 | + } |
| 147 | + |
| 148 | + // Calculate offset for trimming |
| 149 | + let offset = if null_values % 5 == 0 { |
| 150 | + 0 |
| 151 | + } else { |
| 152 | + -(null_values as isize) |
| 153 | + }; |
| 154 | + let result_len = if offset < 0 { |
| 155 | + (char_chunks.len() as isize + offset) as usize |
| 156 | + } else { |
| 157 | + char_chunks.len() |
| 158 | + }; |
| 159 | + |
| 160 | + char_chunks.truncate(result_len); |
| 161 | + char_chunks |
| 162 | +} |
| 163 | + |
| 164 | +#[cfg(test)] |
| 165 | +mod tests { |
| 166 | + use super::*; |
| 167 | + |
| 168 | + #[test] |
| 169 | + fn test_encode_empty() { |
| 170 | + assert_eq!(base85_encode(b""), b""); |
| 171 | + } |
| 172 | + |
| 173 | + #[test] |
| 174 | + fn test_encode_12345() { |
| 175 | + assert_eq!(base85_encode(b"12345"), b"0etOA2#"); |
| 176 | + } |
| 177 | + |
| 178 | + #[test] |
| 179 | + fn test_encode_base85() { |
| 180 | + assert_eq!(base85_encode(b"base 85"), b"@UX=h+?24"); |
| 181 | + } |
| 182 | + |
| 183 | + #[test] |
| 184 | + fn test_decode_empty() { |
| 185 | + assert_eq!(base85_decode(b""), b""); |
| 186 | + } |
| 187 | + |
| 188 | + #[test] |
| 189 | + fn test_decode_12345() { |
| 190 | + assert_eq!(base85_decode(b"0etOA2#"), b"12345"); |
| 191 | + } |
| 192 | + |
| 193 | + #[test] |
| 194 | + fn test_decode_base85() { |
| 195 | + assert_eq!(base85_decode(b"@UX=h+?24"), b"base 85"); |
| 196 | + } |
| 197 | + |
| 198 | + #[test] |
| 199 | + fn test_encode_decode_roundtrip() { |
| 200 | + let test_cases = vec![ |
| 201 | + b"Hello, World!".to_vec(), |
| 202 | + b"The quick brown fox".to_vec(), |
| 203 | + b"Rust".to_vec(), |
| 204 | + b"a".to_vec(), |
| 205 | + ]; |
| 206 | + |
| 207 | + for test_case in test_cases { |
| 208 | + let encoded = base85_encode(&test_case); |
| 209 | + let decoded = base85_decode(&encoded); |
| 210 | + assert_eq!(decoded, test_case); |
| 211 | + } |
| 212 | + } |
| 213 | +} |
0 commit comments