Skip to content

regex 1.0 #230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,6 @@ simd-accel = ["simd"]
# There are no benchmarks in the library code itself
bench = false

# Runs unit tests defined inside the regex package.
# Generally these tests specific pieces of the regex implementation.
[[test]]
path = "src/lib.rs"
name = "regex-inline"

# Run the test suite on the default behavior of Regex::new.
# This includes a mish mash of NFAs and DFAs, which are chosen automatically
# based on the regex. We test both of the NFA implementations by forcing their
Expand Down
2 changes: 1 addition & 1 deletion bench/src/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ macro_rules! regex {
// Always enable the Unicode flag for byte based regexes.
// Really, this should have been enabled by default. *sigh*
use regex::bytes::RegexBuilder;
RegexBuilder::new(&$re.to_owned()).unicode(true).compile().unwrap()
RegexBuilder::new(&$re.to_owned()).unicode(true).build().unwrap()
}}
}

Expand Down
4 changes: 2 additions & 2 deletions examples/shootout-regex-dna-bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn main() {
io::stdin().read_to_end(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, &b""[..]);
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, &b""[..]).into_owned();
let clen = seq.len();
let seq_arc = Arc::new(seq.clone());

Expand Down Expand Up @@ -56,7 +56,7 @@ fn main() {
];
let mut seq = seq;
for (re, replacement) in substs.into_iter() {
seq = re.replace_all(&seq, replacement);
seq = re.replace_all(&seq, replacement).into_owned();
}

for (variant, count) in counts {
Expand Down
10 changes: 5 additions & 5 deletions examples/shootout-regex-dna-cheat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() {
io::stdin().read_to_string(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
let clen = seq.len();
let seq_arc = Arc::new(seq.clone());

Expand Down Expand Up @@ -78,10 +78,10 @@ fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
let re = regex!(&alternates.join("|"));
let mut new = String::with_capacity(text.len());
let mut last_match = 0;
for (s, e) in re.find_iter(text) {
new.push_str(&text[last_match..s]);
new.push_str(replacements[text.as_bytes()[s] as usize]);
last_match = e;
for m in re.find_iter(text) {
new.push_str(&text[last_match..m.start()]);
new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
last_match = m.end();
}
new.push_str(&text[last_match..]);
new
Expand Down
2 changes: 1 addition & 1 deletion examples/shootout-regex-dna-replace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ fn main() {
io::stdin().read_to_string(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
println!("original: {}, replaced: {}", ilen, seq.len());
}
10 changes: 5 additions & 5 deletions examples/shootout-regex-dna-single-cheat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn main() {
io::stdin().read_to_string(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
let clen = seq.len();

let variants = vec![
Expand Down Expand Up @@ -63,10 +63,10 @@ fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
let re = regex!(&alternates.join("|"));
let mut new = String::with_capacity(text.len());
let mut last_match = 0;
for (s, e) in re.find_iter(text) {
new.push_str(&text[last_match..s]);
new.push_str(replacements[text.as_bytes()[s] as usize]);
last_match = e;
for m in re.find_iter(text) {
new.push_str(&text[last_match..m.start()]);
new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
last_match = m.end();
}
new.push_str(&text[last_match..]);
new
Expand Down
4 changes: 2 additions & 2 deletions examples/shootout-regex-dna-single.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn main() {
io::stdin().read_to_string(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
let clen = seq.len();

let variants = vec![
Expand Down Expand Up @@ -49,7 +49,7 @@ fn main() {
];
let mut seq = seq;
for (re, replacement) in substs.into_iter() {
seq = re.replace_all(&seq, replacement);
seq = re.replace_all(&seq, replacement).into_owned();
}
println!("\n{}\n{}\n{}", ilen, clen, seq.len());
}
4 changes: 2 additions & 2 deletions examples/shootout-regex-dna.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn main() {
io::stdin().read_to_string(&mut seq).unwrap();
let ilen = seq.len();

seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "");
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
let clen = seq.len();
let seq_arc = Arc::new(seq.clone());

Expand Down Expand Up @@ -56,7 +56,7 @@ fn main() {
];
let mut seq = seq;
for (re, replacement) in substs.into_iter() {
seq = re.replace_all(&seq, replacement);
seq = re.replace_all(&seq, replacement).into_owned();
}

for (variant, count) in counts {
Expand Down
38 changes: 19 additions & 19 deletions regex-capi/src/rure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub struct rure_match {
pub end: size_t,
}

pub struct Captures(Vec<Option<usize>>);
pub struct Captures(bytes::Locations);

pub struct Iter {
re: *const Regex,
Expand Down Expand Up @@ -98,16 +98,16 @@ ffi_fn! {
let mut builder = bytes::RegexBuilder::new(pat);
if !options.is_null() {
let options = unsafe { &*options };
builder = builder.size_limit(options.size_limit);
builder = builder.dfa_size_limit(options.dfa_size_limit);
builder.size_limit(options.size_limit);
builder.dfa_size_limit(options.dfa_size_limit);
}
builder = builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
builder = builder.multi_line(flags & RURE_FLAG_MULTI > 0);
builder = builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
builder = builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
builder = builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
builder = builder.unicode(flags & RURE_FLAG_UNICODE > 0);
match builder.compile() {
builder.case_insensitive(flags & RURE_FLAG_CASEI > 0);
builder.multi_line(flags & RURE_FLAG_MULTI > 0);
builder.dot_matches_new_line(flags & RURE_FLAG_DOTNL > 0);
builder.swap_greed(flags & RURE_FLAG_SWAP_GREED > 0);
builder.ignore_whitespace(flags & RURE_FLAG_SPACE > 0);
builder.unicode(flags & RURE_FLAG_UNICODE > 0);
match builder.build() {
Ok(re) => {
let mut capture_names = HashMap::new();
for (i, name) in re.capture_names().enumerate() {
Expand Down Expand Up @@ -162,10 +162,10 @@ ffi_fn! {
) -> bool {
let re = unsafe { &*re };
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
re.find_at(haystack, start).map(|(s, e)| unsafe {
re.find_at(haystack, start).map(|m| unsafe {
if !match_info.is_null() {
(*match_info).start = s;
(*match_info).end = e;
(*match_info).start = m.start();
(*match_info).end = m.end();
}
}).is_some()
}
Expand Down Expand Up @@ -258,7 +258,7 @@ ffi_fn! {
}
let (s, e) = match re.find_at(text, it.last_end) {
None => return false,
Some((s, e)) => (s, e),
Some(m) => (m.start(), m.end()),
};
if s == e {
// This is an empty match. To ensure we make progress, start
Expand Down Expand Up @@ -300,7 +300,7 @@ ffi_fn! {
}
let (s, e) = match re.read_captures_at(slots, text, it.last_end) {
None => return false,
Some((s, e)) => (s, e),
Some(m) => (m.start(), m.end()),
};
if s == e {
// This is an empty match. To ensure we make progress, start
Expand All @@ -323,7 +323,7 @@ ffi_fn! {
ffi_fn! {
fn rure_captures_new(re: *const Regex) -> *mut Captures {
let re = unsafe { &*re };
let captures = Captures(vec![None; 2 * re.captures_len()]);
let captures = Captures(re.locations());
Box::into_raw(Box::new(captures))
}
}
Expand All @@ -340,9 +340,9 @@ ffi_fn! {
i: size_t,
match_info: *mut rure_match,
) -> bool {
let captures = unsafe { &(*captures).0 };
match (captures[i * 2], captures[i * 2 + 1]) {
(Some(start), Some(end)) => {
let locs = unsafe { &(*captures).0 };
match locs.pos(i) {
Some((start, end)) => {
if !match_info.is_null() {
unsafe {
(*match_info).start = start;
Expand Down
22 changes: 4 additions & 18 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@ use syntax;
#[derive(Debug)]
pub enum Error {
/// A syntax error.
Syntax(syntax::Error),
Syntax(String),
/// The compiled program exceeded the set size limit.
/// The argument is the size limit imposed.
CompiledTooBig(usize),
/// **DEPRECATED:** Will be removed on next major version bump.
///
/// This error is no longer used. (A `RegexSet` can now contain zero or
/// more regular expressions.)
InvalidSet,
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
Expand All @@ -37,20 +32,14 @@ pub enum Error {
impl ::std::error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::Syntax(ref err) => err.description(),
Error::Syntax(ref err) => err,
Error::CompiledTooBig(_) => "compiled program too big",
Error::InvalidSet => {
"sets must contain 2 or more regular expressions"
}
Error::__Nonexhaustive => unreachable!(),
}
}

fn cause(&self) -> Option<&::std::error::Error> {
match *self {
Error::Syntax(ref err) => Some(err),
_ => None,
}
None
}
}

Expand All @@ -62,16 +51,13 @@ impl fmt::Display for Error {
write!(f, "Compiled regex exceeds size limit of {} bytes.",
limit)
}
Error::InvalidSet => {
write!(f, "Sets must contain 2 or more regular expressions.")
}
Error::__Nonexhaustive => unreachable!(),
}
}
}

impl From<syntax::Error> for Error {
fn from(err: syntax::Error) -> Error {
Error::Syntax(err)
Error::Syntax(err.to_string())
}
}
9 changes: 5 additions & 4 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use prog::Program;
use re_builder::RegexOptions;
use re_bytes;
use re_set;
use re_trait::{RegularExpression, Slot};
use re_trait::{RegularExpression, Slot, Locations, as_slots};
use re_unicode;
use utf8::next_utf8;

Expand Down Expand Up @@ -332,11 +332,11 @@ impl<'c> RegularExpression for ExecNoSyncStr<'c> {
#[inline(always)] // reduces constant overhead
fn read_captures_at(
&self,
slots: &mut [Slot],
locs: &mut Locations,
text: &str,
start: usize,
) -> Option<(usize, usize)> {
self.0.read_captures_at(slots, text.as_bytes(), start)
self.0.read_captures_at(locs, text.as_bytes(), start)
}
}

Expand Down Expand Up @@ -501,10 +501,11 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
/// locations of the overall match.
fn read_captures_at(
&self,
slots: &mut [Slot],
locs: &mut Locations,
text: &[u8],
start: usize,
) -> Option<(usize, usize)> {
let slots = as_slots(locs);
for slot in slots.iter_mut() {
*slot = None;
}
Expand Down
Loading