Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion turtle/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::MAX_STACK_SIZE;
use crate::{MAX_BUFFER_SIZE, MAX_STACK_SIZE};
use oxilangtag::LanguageTagParseError;
use oxiri::IriParseError;
use rio_api::parser::{LineBytePosition, ParseError};
Expand Down Expand Up @@ -32,6 +32,7 @@ pub enum TurtleErrorKind {
error: LanguageTagParseError,
},
StackOverflow,
BufferOverflow,
}

impl fmt::Display for TurtleError {
Expand All @@ -56,6 +57,9 @@ impl fmt::Display for TurtleError {
TurtleErrorKind::StackOverflow => {
write!(f, "The parser encountered more than {} nested constructions. This number is limited in order to avoid stack overflow OS errors.", MAX_STACK_SIZE)
}
TurtleErrorKind::BufferOverflow => {
write!(f, "The parser encountered a term with more than {} bytes. The size is limited in order to avoid out of memory error on invalid files.", MAX_BUFFER_SIZE)
}
}?;
if let Some(position) = self.position {
write!(
Expand Down
1 change: 1 addition & 0 deletions turtle/src/gnquads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ pub(crate) fn parse_variable<'a>(
} else {
return Ok(Variable { name: buffer });
}
read.check_buffer_size(buffer)?;
}
}

Expand Down
3 changes: 3 additions & 0 deletions turtle/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,6 @@ pub use gtrig::GTriGParser;
/// This limit is set in order to avoid stack overflow error when parsing such structures due to too many recursive calls.
/// The actual limit value is a wet finger compromise between not failing to parse valid files and avoiding to trigger stack overflow errors.
const MAX_STACK_SIZE: usize = 128;

/// Maximal size of a buffer (useful to limit memory consumption).
const MAX_BUFFER_SIZE: usize = 10_000_000;
4 changes: 4 additions & 0 deletions turtle/src/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ pub fn parse_iriref(
read_utf8_char(read)?
}),
}
read.check_buffer_size(buffer)?;
}
}

Expand Down Expand Up @@ -117,6 +118,7 @@ pub fn parse_blank_node_label<'a>(
}
}
}
read.check_buffer_size(buffer)?;
}
}

Expand All @@ -138,6 +140,7 @@ pub fn parse_langtag(
break;
}
}
read.check_buffer_size(buffer)?;
}
LanguageTag::parse(buffer.as_str()).map_err(|error| {
read.parse_error(TurtleErrorKind::InvalidLanguageTag {
Expand Down Expand Up @@ -177,6 +180,7 @@ pub fn parse_string_literal_quote_inner(
read_utf8_char(read)?
}),
}
read.check_buffer_size(buffer)?;
}
}

Expand Down
6 changes: 6 additions & 0 deletions turtle/src/turtle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,7 @@ pub(crate) fn parse_numeric_literal<'a>(
}
_ => break,
}
read.check_buffer_size(buffer)?;
}

// We read the digits after .
Expand Down Expand Up @@ -1044,6 +1045,7 @@ pub(crate) fn parse_numeric_literal<'a>(
}
_ => break,
}
read.check_buffer_size(buffer)?;
}
Some(count_after)
} else {
Expand Down Expand Up @@ -1231,6 +1233,7 @@ pub(crate) fn parse_prefixed_name<'a>(
}
}
}
read.check_buffer_size(buffer)?;
}
Ok(NamedNode { iri: buffer })
}
Expand Down Expand Up @@ -1320,6 +1323,7 @@ fn parse_exponent(
} else {
return Ok(());
}
read.check_buffer_size(buffer)?;
}
}

Expand Down Expand Up @@ -1368,6 +1372,7 @@ fn parse_string_literal_long_quote_inner(
read_utf8_char(read)?
}),
}
read.check_buffer_size(buffer)?;
}
}

Expand Down Expand Up @@ -1427,6 +1432,7 @@ fn parse_pn_prefix(
}
}
}
read.check_buffer_size(buffer)?;
}
}

Expand Down
13 changes: 12 additions & 1 deletion turtle/src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::error::*;
use crate::MAX_STACK_SIZE;
use crate::{MAX_BUFFER_SIZE, MAX_STACK_SIZE};
use rio_api::parser::LineBytePosition;
use std::collections::VecDeque;
use std::io::{BufRead, ErrorKind, Read};
Expand Down Expand Up @@ -175,6 +175,9 @@ impl<R: BufRead> LookAheadByteReader<R> {
Err(e) if e.kind() == ErrorKind::Interrupted => {}
Err(e) => return Err(e.into()),
}
if self.buffer.len() > MAX_BUFFER_SIZE {
return Err(self.parse_error(TurtleErrorKind::BufferOverflow));
}
}
}

Expand Down Expand Up @@ -208,6 +211,14 @@ impl<R: BufRead> LookAheadByteReader<R> {
pub fn decrement_stack_size(&mut self) {
self.stack_size -= 1;
}

pub fn check_buffer_size(&self, buffer: &str) -> Result<(), TurtleError> {
if buffer.len() > MAX_BUFFER_SIZE {
Err(self.parse_error(TurtleErrorKind::BufferOverflow))
} else {
Ok(())
}
}
}

#[derive(Default)]
Expand Down
19 changes: 19 additions & 0 deletions turtle/tests/recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,22 @@ fn nquads_error_recovery() {
assert_eq!(count, 3);
assert_eq!(count_err, 2);
}

#[test]
fn very_big_literal() {
let mut data = String::with_capacity(12_000_000);
data.push_str("<http://example.com/s> <http://example.com/p> \"");
for _ in 0..11_000_000 {
data.push('0');
}
data.push_str("\" .");
let mut parser = NTriplesParser::new(Cursor::new(&data));
assert!(parser
.parse_step(&mut |_| Ok(()) as Result<(), TurtleError>)
.is_ok());
assert!(parser
.parse_step(&mut |_| Ok(()) as Result<(), TurtleError>)
.unwrap_err()
.to_string()
.contains("out of memory"));
}