|
| 1 | +use crate::*; |
| 2 | + |
| 3 | +pub(crate) fn xdl_isspace(v: u8) -> bool { |
| 4 | + match v { |
| 5 | + b'\t' | b'\n' | b'\r' | b' ' => true, |
| 6 | + _ => false, |
| 7 | + } |
| 8 | +} |
| 9 | + |
| 10 | +pub struct WhitespaceIter<'a> { |
| 11 | + line: &'a [u8], |
| 12 | + index: usize, |
| 13 | + flags: u64, |
| 14 | +} |
| 15 | + |
| 16 | + |
| 17 | +impl<'a> WhitespaceIter<'a> { |
| 18 | + pub fn new(line: &'a [u8], flags: u64) -> Self { |
| 19 | + Self { |
| 20 | + line, |
| 21 | + index: 0, |
| 22 | + flags, |
| 23 | + } |
| 24 | + } |
| 25 | +} |
| 26 | + |
| 27 | +impl<'a> Iterator for WhitespaceIter<'a> { |
| 28 | + type Item = &'a [u8]; |
| 29 | + |
| 30 | + fn next(&mut self) -> Option<Self::Item> { |
| 31 | + if self.index >= self.line.len() { |
| 32 | + return None; |
| 33 | + } |
| 34 | + |
| 35 | + loop { |
| 36 | + let start = self.index; |
| 37 | + if self.index == self.line.len() { |
| 38 | + return None; |
| 39 | + } |
| 40 | + |
| 41 | + /* return contiguous run of not space bytes */ |
| 42 | + while self.index < self.line.len() { |
| 43 | + if xdl_isspace(self.line[self.index]) { |
| 44 | + break; |
| 45 | + } |
| 46 | + self.index += 1; |
| 47 | + } |
| 48 | + if self.index > start { |
| 49 | + return Some(&self.line[start..self.index]); |
| 50 | + } |
| 51 | + /* the current byte had better be a space */ |
| 52 | + if !xdl_isspace(self.line[self.index]) { |
| 53 | + panic!("xdl_line_iter_next xdl_isspace() is false") |
| 54 | + } |
| 55 | + |
| 56 | + while self.index < self.line.len() && xdl_isspace(self.line[self.index]) { |
| 57 | + self.index += 1; |
| 58 | + } |
| 59 | + |
| 60 | + |
| 61 | + if self.index <= start { |
| 62 | + panic!("xdl_isspace() cannot simultaneously be true and false"); |
| 63 | + } |
| 64 | + |
| 65 | + if (self.flags & XDF_IGNORE_WHITESPACE_AT_EOL) != 0 |
| 66 | + && self.index == self.line.len() |
| 67 | + { |
| 68 | + return None; |
| 69 | + } |
| 70 | + if (self.flags & XDF_IGNORE_WHITESPACE) != 0 { |
| 71 | + continue; |
| 72 | + } |
| 73 | + if (self.flags & XDF_IGNORE_WHITESPACE_CHANGE) != 0 { |
| 74 | + if self.index == self.line.len() { |
| 75 | + continue; |
| 76 | + } |
| 77 | + return Some(" ".as_bytes()); |
| 78 | + } |
| 79 | + if (self.flags & XDF_IGNORE_CR_AT_EOL) != 0 { |
| 80 | + if start < self.line.len() && self.index == self.line.len() { |
| 81 | + let mut end = self.line.len(); |
| 82 | + if end > 0 && self.line[end - 1] == b'\n' { |
| 83 | + if end - start == 1 { |
| 84 | + return Some(&self.line[start..end]); |
| 85 | + } else { |
| 86 | + end -= 1; |
| 87 | + } |
| 88 | + if end > 0 && self.line[end - 1] == b'\r' { |
| 89 | + self.index = end; |
| 90 | + end -= 1; |
| 91 | + if end - start == 0 { |
| 92 | + continue; |
| 93 | + } |
| 94 | + return Some(&self.line[start..end]); |
| 95 | + } |
| 96 | + } |
| 97 | + } |
| 98 | + } |
| 99 | + return Some(&self.line[start..self.index]); |
| 100 | + } |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +pub fn chunked_iter_equal<'a, T, IT0, IT1>(mut it0: IT0, mut it1: IT1) -> bool |
| 105 | +where |
| 106 | + T: Eq + 'a, |
| 107 | + IT0: Iterator<Item = &'a [T]>, |
| 108 | + IT1: Iterator<Item = &'a [T]>, |
| 109 | +{ |
| 110 | + let mut run_option0: Option<&[T]> = it0.next(); |
| 111 | + let mut run_option1: Option<&[T]> = it1.next(); |
| 112 | + let mut i0 = 0; |
| 113 | + let mut i1 = 0; |
| 114 | + |
| 115 | + while let (Some(run0), Some(run1)) = (run_option0, run_option1) { |
| 116 | + while i0 < run0.len() && i1 < run1.len() { |
| 117 | + if run0[i0] != run1[i1] { |
| 118 | + return false; |
| 119 | + } |
| 120 | + |
| 121 | + i0 += 1; |
| 122 | + i1 += 1; |
| 123 | + } |
| 124 | + |
| 125 | + if i0 == run0.len() { |
| 126 | + i0 = 0; |
| 127 | + run_option0 = it0.next(); |
| 128 | + } |
| 129 | + if i1 == run1.len() { |
| 130 | + i1 = 0; |
| 131 | + run_option1 = it1.next(); |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + while let Some(run0) = run_option0 { |
| 136 | + if run0.len() == 0 { |
| 137 | + run_option0 = it0.next(); |
| 138 | + } else { |
| 139 | + break; |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + while let Some(run1) = run_option1 { |
| 144 | + if run1.len() == 0 { |
| 145 | + run_option1 = it1.next(); |
| 146 | + } else { |
| 147 | + break; |
| 148 | + } |
| 149 | + } |
| 150 | + |
| 151 | + run_option0.is_none() && run_option1.is_none() |
| 152 | +} |
| 153 | + |
| 154 | +#[cfg(test)] |
| 155 | +mod tests { |
| 156 | + use crate::*; |
| 157 | + use crate::xutils::{chunked_iter_equal, WhitespaceIter}; |
| 158 | + |
| 159 | + fn extract_string<'a>(line: &[u8], flags: u64, buffer: &'a mut Vec<u8>) -> &'a str { |
| 160 | + let it = WhitespaceIter::new(line, flags); |
| 161 | + buffer.clear(); |
| 162 | + for run in it { |
| 163 | + #[cfg(test)] |
| 164 | + let _view = unsafe { std::str::from_utf8_unchecked(run) }; |
| 165 | + buffer.extend_from_slice(run); |
| 166 | + } |
| 167 | + unsafe { std::str::from_utf8_unchecked(buffer.as_slice()) } |
| 168 | + } |
| 169 | + |
| 170 | + fn get_str_it<'a>(slice: &'a [&'a str]) -> impl Iterator<Item = &'a [u8]> + 'a { |
| 171 | + slice.iter().map(|v| (*v).as_bytes()) |
| 172 | + } |
| 173 | + |
| 174 | + #[test] |
| 175 | + fn test_ignore_space() { |
| 176 | + let tv_individual = vec![ |
| 177 | + ("ab\r", "ab\r", XDF_IGNORE_CR_AT_EOL), |
| 178 | + ("ab \r", "ab \r", XDF_IGNORE_CR_AT_EOL), |
| 179 | + ("\r \t a \r", "\r \t a \r", XDF_IGNORE_CR_AT_EOL), |
| 180 | + ("\r a \r", "\r a \r", XDF_IGNORE_CR_AT_EOL), |
| 181 | + ("\r", "\r", XDF_IGNORE_CR_AT_EOL), |
| 182 | + ("", "", XDF_IGNORE_CR_AT_EOL), |
| 183 | + ("\r a \r", "\r a \r", XDF_IGNORE_CR_AT_EOL), |
| 184 | + |
| 185 | + ("\r \t a \n", "\r \t a \r\n", XDF_IGNORE_CR_AT_EOL), |
| 186 | + ("\r a \n", "\r a \r\n", XDF_IGNORE_CR_AT_EOL), |
| 187 | + ("\n", "\r\n", XDF_IGNORE_CR_AT_EOL), |
| 188 | + ("\n", "\n", XDF_IGNORE_CR_AT_EOL), |
| 189 | + ("\r a \n", "\r a \n", XDF_IGNORE_CR_AT_EOL), |
| 190 | + |
| 191 | + ("1\n", "1\r\n", XDF_IGNORE_CR_AT_EOL), |
| 192 | + ("1", "1\r\n", XDF_IGNORE_WHITESPACE_CHANGE), |
| 193 | + |
| 194 | + ("\r \t a \r\n", "\r \t a \r\n", 0), |
| 195 | + ("\r a \r\n", "\r a \r\n", 0), |
| 196 | + ("\r\n", "\r\n", 0), |
| 197 | + ("\n", "\n", 0), |
| 198 | + ("\r a \n", "\r a \n", 0), |
| 199 | + (" \n", " \n", 0), |
| 200 | + ("a \n", "a \n", 0), |
| 201 | + (" a \t asdf \t \r\n", " a \t asdf \t \r\n", 0), |
| 202 | + ("\t a b \t \n", "\t a b \t \n", 0), |
| 203 | + (" a b \t \r\n", " a b \t \r\n", 0), |
| 204 | + ("\t a \n", "\t a \n", 0), |
| 205 | + ("\t\t\ta\t\n", "\t\t\ta\t\n", 0), |
| 206 | + ("a\n", "a\n", 0), |
| 207 | + ("\ta\n", "\ta\n", 0), |
| 208 | + |
| 209 | + ("a", "\r \t a \r\n", XDF_IGNORE_WHITESPACE), |
| 210 | + ("a", "\r a \r\n", XDF_IGNORE_WHITESPACE), |
| 211 | + ("", "\r\n", XDF_IGNORE_WHITESPACE), |
| 212 | + ("", "\n", XDF_IGNORE_WHITESPACE), |
| 213 | + ("a", "\r a \n", XDF_IGNORE_WHITESPACE), |
| 214 | + ("", " \n", XDF_IGNORE_WHITESPACE), |
| 215 | + ("a", "a \n", XDF_IGNORE_WHITESPACE), |
| 216 | + ("aasdf", " a \t asdf \t \r\n", XDF_IGNORE_WHITESPACE), |
| 217 | + ("ab", "\t a b \t \n", XDF_IGNORE_WHITESPACE), |
| 218 | + ("ab", " a b \t \r\n", XDF_IGNORE_WHITESPACE), |
| 219 | + ("a", "\t a \n", XDF_IGNORE_WHITESPACE), |
| 220 | + ("a", "\t\t\ta\t\n", XDF_IGNORE_WHITESPACE), |
| 221 | + ("a", "a\n", XDF_IGNORE_WHITESPACE), |
| 222 | + ("a", "\ta\n", XDF_IGNORE_WHITESPACE), |
| 223 | + |
| 224 | + ("", " \n", XDF_IGNORE_WHITESPACE_AT_EOL), |
| 225 | + ("a", "a \n", XDF_IGNORE_WHITESPACE_AT_EOL), |
| 226 | + (" a \t asdf", " a \t asdf \t \r\n", XDF_IGNORE_WHITESPACE_AT_EOL), |
| 227 | + ("\t a b", "\t a b \t \n", XDF_IGNORE_WHITESPACE_AT_EOL), |
| 228 | + |
| 229 | + (" a b", " a b \t \r\n", XDF_IGNORE_WHITESPACE_CHANGE), |
| 230 | + (" a", "\t a \n", XDF_IGNORE_WHITESPACE_CHANGE), |
| 231 | + (" a", "\t\t\ta\t\n", XDF_IGNORE_WHITESPACE_CHANGE), |
| 232 | + ("a", "a\n", XDF_IGNORE_WHITESPACE_CHANGE), |
| 233 | + (" a", "\ta\n", XDF_IGNORE_WHITESPACE_CHANGE), |
| 234 | + |
| 235 | + ("ab", " a b \t \r\n", XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE), |
| 236 | + ("a", "\t a \n", XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE), |
| 237 | + ("a", "\t\t\ta\t\n", XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE), |
| 238 | + ("a", "a\n", XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE), |
| 239 | + ("a", "\ta\n", XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE), |
| 240 | + ]; |
| 241 | + |
| 242 | + let mut buffer = Vec::<u8>::new(); |
| 243 | + for (expected, input, flags) in tv_individual { |
| 244 | + let actual = extract_string(input.as_bytes(), flags, &mut buffer); |
| 245 | + assert_eq!(expected, actual, "input: {:?} flags: 0x{:x}", input, flags); |
| 246 | + } |
| 247 | + } |
| 248 | + |
| 249 | + #[test] |
| 250 | + fn test_chunked_iter_equal() { |
| 251 | + let tv_str: Vec<(Vec<&str>, Vec<&str>)> = vec![ |
| 252 | + /* equal cases */ |
| 253 | + (vec!["", "", "abc"], vec!["", "abc"]), |
| 254 | + (vec!["c", "", "a"], vec!["c", "a"]), |
| 255 | + (vec!["a", "", "b", "", "c"], vec!["a", "b", "c"]), |
| 256 | + (vec!["", "", "a"], vec!["a"]), |
| 257 | + (vec!["", "a"], vec!["a"]), |
| 258 | + (vec![""], vec![]), |
| 259 | + (vec!["", ""], vec![""]), |
| 260 | + (vec!["a"], vec!["", "", "a"]), |
| 261 | + (vec!["a"], vec!["", "a"]), |
| 262 | + (vec![], vec![""]), |
| 263 | + (vec![""], vec!["", ""]), |
| 264 | + (vec!["hello ", "world"], vec!["hel", "lo wo", "rld"]), |
| 265 | + (vec!["hel", "lo wo", "rld"], vec!["hello ", "world"]), |
| 266 | + (vec!["hello world"], vec!["hello world"]), |
| 267 | + (vec!["abc", "def"], vec!["def", "abc"]), |
| 268 | + (vec![], vec![]), |
| 269 | + |
| 270 | + /* different cases */ |
| 271 | + (vec!["abc"], vec![]), |
| 272 | + (vec!["", "", ""], vec!["", "a"]), |
| 273 | + (vec!["", "a"], vec!["b", ""]), |
| 274 | + (vec!["abc"], vec!["abc", "de"]), |
| 275 | + (vec!["abc", "de"], vec!["abc"]), |
| 276 | + (vec![], vec!["a"]), |
| 277 | + (vec!["a"], vec![]), |
| 278 | + (vec!["abc", "kj"], vec!["abc", "de"]), |
| 279 | + ]; |
| 280 | + |
| 281 | + for (lhs, rhs) in tv_str.iter() { |
| 282 | + let a: Vec<u8> = get_str_it(lhs).flatten().copied().collect(); |
| 283 | + let b: Vec<u8> = get_str_it(rhs).flatten().copied().collect(); |
| 284 | + let expected = a.as_slice() == b.as_slice(); |
| 285 | + |
| 286 | + let it0 = get_str_it(lhs); |
| 287 | + let it1 = get_str_it(rhs); |
| 288 | + let actual = chunked_iter_equal(it0, it1); |
| 289 | + assert_eq!(expected, actual); |
| 290 | + } |
| 291 | + } |
| 292 | +} |
0 commit comments