|
| 1 | +use std::borrow::Cow; |
| 2 | +use std::fmt; |
| 3 | +use std::iter::Peekable; |
| 4 | +use std::str::CharIndices; |
| 5 | + |
| 6 | +use crate::error::{Error, Result}; |
| 7 | +use crate::state::Lua; |
| 8 | +use crate::traits::IntoLua; |
| 9 | +use crate::types::Integer; |
| 10 | +use crate::value::Value; |
| 11 | + |
| 12 | +#[derive(Debug)] |
| 13 | +pub(crate) enum PathKey<'a> { |
| 14 | + Str(Cow<'a, str>), |
| 15 | + Int(Integer), |
| 16 | +} |
| 17 | + |
| 18 | +impl fmt::Display for PathKey<'_> { |
| 19 | + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 20 | + match self { |
| 21 | + PathKey::Str(s) => write!(f, "{}", s), |
| 22 | + PathKey::Int(i) => write!(f, "{}", i), |
| 23 | + } |
| 24 | + } |
| 25 | +} |
| 26 | + |
| 27 | +impl IntoLua for PathKey<'_> { |
| 28 | + fn into_lua(self, lua: &Lua) -> Result<Value> { |
| 29 | + match self { |
| 30 | + PathKey::Str(s) => Ok(Value::String(lua.create_string(s.as_ref())?)), |
| 31 | + PathKey::Int(i) => Ok(Value::Integer(i)), |
| 32 | + } |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +// Parses a path like `a.b[3]?.c["d"]` into segments of `(key, safe_nil)`. |
| 37 | +pub(crate) fn parse_path<'a>(path: &'a str) -> Result<Vec<(PathKey<'a>, bool)>> { |
| 38 | + fn read_ident<'a>(path: &'a str, chars: &mut Peekable<CharIndices<'a>>) -> (Cow<'a, str>, bool) { |
| 39 | + let mut safe_nil = false; |
| 40 | + let start = chars.peek().map(|&(i, _)| i).unwrap_or(path.len()); |
| 41 | + let mut end = start; |
| 42 | + while let Some(&(pos, c)) = chars.peek() { |
| 43 | + if c == '.' || c == '?' || c.is_ascii_whitespace() || c == '[' { |
| 44 | + if c == '?' { |
| 45 | + safe_nil = true; |
| 46 | + chars.next(); // consume '?' |
| 47 | + } |
| 48 | + break; |
| 49 | + } |
| 50 | + end = pos + c.len_utf8(); |
| 51 | + chars.next(); |
| 52 | + } |
| 53 | + (Cow::Borrowed(&path[start..end]), safe_nil) |
| 54 | + } |
| 55 | + |
| 56 | + let mut segments = Vec::new(); |
| 57 | + let mut chars = path.char_indices().peekable(); |
| 58 | + while let Some(&(pos, next)) = chars.peek() { |
| 59 | + match next { |
| 60 | + '.' => { |
| 61 | + // Dot notation: identifier |
| 62 | + chars.next(); |
| 63 | + let (key, safe_nil) = read_ident(path, &mut chars); |
| 64 | + if key.is_empty() { |
| 65 | + return Err(Error::runtime(format!("empty key in path at position {pos}"))); |
| 66 | + } |
| 67 | + segments.push((PathKey::Str(key), safe_nil)); |
| 68 | + } |
| 69 | + '[' => { |
| 70 | + // Bracket notation: either integer or quoted string |
| 71 | + chars.next(); |
| 72 | + let key = match chars.peek() { |
| 73 | + Some(&(pos, c @ '0'..='9' | c @ '-')) => { |
| 74 | + // Integer key |
| 75 | + let negative = c == '-'; |
| 76 | + if negative { |
| 77 | + chars.next(); // consume '-' |
| 78 | + } |
| 79 | + let mut num: Option<Integer> = None; |
| 80 | + while let Some(&(_, c @ '0'..='9')) = chars.peek() { |
| 81 | + let new_num = num |
| 82 | + .unwrap_or(0) |
| 83 | + .checked_mul(10) |
| 84 | + .and_then(|n| n.checked_add((c as u8 - b'0') as Integer)) |
| 85 | + .ok_or_else(|| { |
| 86 | + Error::runtime(format!("integer overflow in path at position {pos}")) |
| 87 | + })?; |
| 88 | + num = Some(new_num); |
| 89 | + chars.next(); // consume digit |
| 90 | + } |
| 91 | + match num { |
| 92 | + Some(n) if negative => PathKey::Int(-n), |
| 93 | + Some(n) => PathKey::Int(n), |
| 94 | + None => { |
| 95 | + let err = format!("invalid integer in path at position {pos}"); |
| 96 | + return Err(Error::runtime(err)); |
| 97 | + } |
| 98 | + } |
| 99 | + } |
| 100 | + Some((_, '\'' | '"')) => { |
| 101 | + // Quoted string |
| 102 | + PathKey::Str(unquote_string(path, &mut chars)?) |
| 103 | + } |
| 104 | + Some((_, ']')) => { |
| 105 | + return Err(Error::runtime(format!("empty key in path at position {pos}"))); |
| 106 | + } |
| 107 | + Some((pos, c)) => { |
| 108 | + let err = format!("unexpected character '{c}' in path at position {pos}"); |
| 109 | + return Err(Error::runtime(err)); |
| 110 | + } |
| 111 | + None => { |
| 112 | + return Err(Error::runtime("unexpected end of path")); |
| 113 | + } |
| 114 | + }; |
| 115 | + // Expect closing bracket |
| 116 | + let mut safe_nil = false; |
| 117 | + match chars.next() { |
| 118 | + Some((_, ']')) => { |
| 119 | + // Check for optional safe-nil operator |
| 120 | + if let Some(&(_, '?')) = chars.peek() { |
| 121 | + safe_nil = true; |
| 122 | + chars.next(); // consume '?' |
| 123 | + } |
| 124 | + } |
| 125 | + Some((pos, c)) => { |
| 126 | + let err = format!("expected ']' in path at position {pos}, found '{c}'"); |
| 127 | + return Err(Error::runtime(err)); |
| 128 | + } |
| 129 | + None => { |
| 130 | + return Err(Error::runtime("unexpected end of path")); |
| 131 | + } |
| 132 | + } |
| 133 | + segments.push((key, safe_nil)); |
| 134 | + } |
| 135 | + c if c.is_ascii_whitespace() => { |
| 136 | + chars.next(); // Skip whitespace |
| 137 | + } |
| 138 | + _ if segments.is_empty() => { |
| 139 | + // First segment without dot/bracket notation |
| 140 | + let (key_cow, safe_nil) = read_ident(path, &mut chars); |
| 141 | + if key_cow.is_empty() { |
| 142 | + return Err(Error::runtime(format!("empty key in path at position {pos}"))); |
| 143 | + } |
| 144 | + segments.push((PathKey::Str(key_cow), safe_nil)); |
| 145 | + } |
| 146 | + c => { |
| 147 | + let err = format!("unexpected character '{c}' in path at position {pos}"); |
| 148 | + return Err(Error::runtime(err)); |
| 149 | + } |
| 150 | + } |
| 151 | + } |
| 152 | + Ok(segments) |
| 153 | +} |
| 154 | + |
| 155 | +fn unquote_string<'a>(path: &'a str, chars: &mut Peekable<CharIndices<'a>>) -> Result<Cow<'a, str>> { |
| 156 | + let (start_pos, first_quote) = chars.next().unwrap(); |
| 157 | + let mut result = String::new(); |
| 158 | + loop { |
| 159 | + match chars.next() { |
| 160 | + Some((pos, '\\')) => { |
| 161 | + if result.is_empty() { |
| 162 | + // First escape found, copy everything up to this point |
| 163 | + result.push_str(&path[start_pos + 1..pos]); |
| 164 | + } |
| 165 | + match chars.next() { |
| 166 | + Some((_, '\\')) => result.push('\\'), |
| 167 | + Some((_, '"')) => result.push('"'), |
| 168 | + Some((_, '\'')) => result.push('\''), |
| 169 | + Some((_, other)) => { |
| 170 | + result.push('\\'); |
| 171 | + result.push(other); |
| 172 | + } |
| 173 | + None => continue, // will be handled by outer loop |
| 174 | + } |
| 175 | + } |
| 176 | + Some((pos, c)) if c == first_quote => { |
| 177 | + if !result.is_empty() { |
| 178 | + return Ok(Cow::Owned(result)); |
| 179 | + } |
| 180 | + // No escapes, return borrowed slice |
| 181 | + return Ok(Cow::Borrowed(&path[start_pos + 1..pos])); |
| 182 | + } |
| 183 | + Some((_, c)) => { |
| 184 | + if !result.is_empty() { |
| 185 | + result.push(c); |
| 186 | + } |
| 187 | + // If no escapes yet, continue tracking for potential borrowed slice |
| 188 | + } |
| 189 | + None => { |
| 190 | + let err = format!("unexpected end of string at position {start_pos}"); |
| 191 | + return Err(Error::runtime(err)); |
| 192 | + } |
| 193 | + } |
| 194 | + } |
| 195 | +} |
| 196 | + |
| 197 | +#[cfg(test)] |
| 198 | +mod tests { |
| 199 | + use super::{parse_path, PathKey}; |
| 200 | + |
| 201 | + #[test] |
| 202 | + fn test_parse_path() { |
| 203 | + // Test valid paths |
| 204 | + let path = parse_path("a.b[3]?.c['d']").unwrap(); |
| 205 | + assert_eq!(path.len(), 5); |
| 206 | + assert!(matches!(path[0], (PathKey::Str(ref s), false) if s == "a")); |
| 207 | + assert!(matches!(path[1], (PathKey::Str(ref s), false) if s == "b")); |
| 208 | + assert!(matches!(path[2], (PathKey::Int(3), true))); |
| 209 | + assert!(matches!(path[3], (PathKey::Str(ref s), false) if s == "c")); |
| 210 | + assert!(matches!(path[4], (PathKey::Str(ref s), false) if s == "d")); |
| 211 | + |
| 212 | + // Test empty path |
| 213 | + let path = parse_path("").unwrap(); |
| 214 | + assert_eq!(path.len(), 0); |
| 215 | + let path = parse_path(" ").unwrap(); |
| 216 | + assert_eq!(path.len(), 0); |
| 217 | + |
| 218 | + // Test invalid dot syntax |
| 219 | + let err = parse_path("a..b").unwrap_err().to_string(); |
| 220 | + assert_eq!(err, "runtime error: empty key in path at position 1"); |
| 221 | + let err = parse_path("a.b.").unwrap_err().to_string(); |
| 222 | + assert_eq!(err, "runtime error: empty key in path at position 3"); |
| 223 | + |
| 224 | + // Test invalid bracket syntax |
| 225 | + let err = parse_path("a[unclosed").unwrap_err().to_string(); |
| 226 | + assert_eq!( |
| 227 | + err, |
| 228 | + "runtime error: unexpected character 'u' in path at position 2" |
| 229 | + ); |
| 230 | + let err = parse_path("a[]").unwrap_err().to_string(); |
| 231 | + assert_eq!(err, "runtime error: empty key in path at position 1"); |
| 232 | + let err = parse_path(r#"a["unclosed"#).unwrap_err().to_string(); |
| 233 | + assert_eq!(err, "runtime error: unexpected end of string at position 2"); |
| 234 | + let err = parse_path(r#"a["#).unwrap_err().to_string(); |
| 235 | + assert_eq!(err, "runtime error: unexpected end of path"); |
| 236 | + let err = parse_path(r#"a[123"#).unwrap_err().to_string(); |
| 237 | + assert_eq!(err, "runtime error: unexpected end of path"); |
| 238 | + let err = parse_path(r#"a['bla'123"#).unwrap_err().to_string(); |
| 239 | + assert_eq!( |
| 240 | + err, |
| 241 | + "runtime error: expected ']' in path at position 7, found '1'" |
| 242 | + ); |
| 243 | + let err = parse_path(r#"a["bla"]x"#).unwrap_err().to_string(); |
| 244 | + assert_eq!( |
| 245 | + err, |
| 246 | + "runtime error: unexpected character 'x' in path at position 8" |
| 247 | + ); |
| 248 | + |
| 249 | + // Test bad integers |
| 250 | + let err = parse_path("a[99999999999999999999]").unwrap_err().to_string(); |
| 251 | + assert_eq!(err, "runtime error: integer overflow in path at position 2"); |
| 252 | + let err = parse_path("a[-]").unwrap_err().to_string(); |
| 253 | + assert_eq!(err, "runtime error: invalid integer in path at position 2"); |
| 254 | + } |
| 255 | +} |
0 commit comments