Skip to content

Commit 1c3e5af

Browse files
committed
Setup sentence skeleton
1 parent fa10dd3 commit 1c3e5af

File tree

3 files changed

+110
-0
lines changed

3 files changed

+110
-0
lines changed

src/lib.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,12 @@ pub use grapheme::{Graphemes, GraphemeIndices};
6767
pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
6868
pub use tables::UNICODE_VERSION;
6969
pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords};
70+
pub use sentence::{USentenceBounds};
7071

7172
mod grapheme;
7273
mod tables;
7374
mod word;
75+
mod sentence;
7476

7577
#[cfg(test)]
7678
mod test;
@@ -174,6 +176,9 @@ pub trait UnicodeSegmentation {
174176
/// assert_eq!(&swi1[..], b);
175177
/// ```
176178
fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>;
179+
180+
/// TODO
181+
fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>;
177182
}
178183

179184
impl UnicodeSegmentation for str {
@@ -201,4 +206,9 @@ impl UnicodeSegmentation for str {
201206
fn split_word_bound_indices(&self) -> UWordBoundIndices {
202207
word::new_word_bound_indices(self)
203208
}
209+
210+
#[inline]
211+
fn split_sentence_bounds(&self) -> USentenceBounds {
212+
sentence::new_sentence_bounds(self)
213+
}
204214
}

src/sentence.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use core::cmp;
12+
use core::iter::Filter;
13+
14+
use tables::sentence::SentenceCat;
15+
16+
/// TODO
17+
#[derive(Clone)]
18+
pub struct USentenceBounds<'a> {
19+
string: &'a str
20+
// state?
21+
}
22+
23+
impl<'a> Iterator for USentenceBounds<'a> {
24+
type Item = &'a str;
25+
26+
#[inline]
27+
fn size_hint(&self) -> (usize, Option<usize>) {
28+
let slen = self.string.len();
29+
(cmp::min(slen, 1), Some(slen))
30+
}
31+
32+
#[inline]
33+
fn next(&mut self) -> Option<&'a str> {
34+
panic!("todo")
35+
}
36+
}
37+
38+
#[inline]
39+
pub fn new_sentence_bounds<'b>(s: &'b str) -> USentenceBounds<'b> {
40+
USentenceBounds { string: s }
41+
}

src/test.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,52 @@ fn test_words() {
141141
}
142142
}
143143

144+
145+
#[test]
146+
fn test_sentences() {
147+
use testdata::TEST_SENTENCE;
148+
149+
for &(s, w) in TEST_SENTENCE.iter() {
150+
macro_rules! assert_ {
151+
($test:expr, $exp:expr, $name:expr) => {
152+
// collect into vector for better diagnostics in failure case
153+
let testing = $test.collect::<Vec<_>>();
154+
let expected = $exp.collect::<Vec<_>>();
155+
assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w)
156+
}
157+
}
158+
// test forward iterator
159+
assert_!(s.split_sentence_bounds(),
160+
w.iter().cloned(),
161+
"Forward sentence boundaries");
162+
163+
/*
164+
// test reverse iterator
165+
assert_!(s.split_sentence_bounds().rev(),
166+
w.iter().rev().cloned(),
167+
"Reverse sentence boundaries");
168+
169+
// generate offsets from sentence string lengths
170+
let mut indices = vec![0];
171+
for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) {
172+
indices.push(i);
173+
}
174+
indices.pop();
175+
let indices = indices;
176+
177+
// test forward indices iterator
178+
assert_!(s.split_sentence_bound_indices().map(|(l,_)| l),
179+
indices.iter().cloned(),
180+
"Forward sentence indices");
181+
182+
// test backward indices iterator
183+
assert_!(s.split_sentence_bound_indices().rev().map(|(l,_)| l),
184+
indices.iter().rev().cloned(),
185+
"Reverse sentence indices");
186+
*/
187+
}
188+
}
189+
144190
quickcheck! {
145191
fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool {
146192
let a = s.graphemes(true).collect::<Vec<_>>();
@@ -173,4 +219,17 @@ quickcheck! {
173219
let a = s.split_word_bounds().collect::<String>();
174220
a == s
175221
}
222+
/*
223+
fn quickcheck_forward_reverse_sentences(s: String) -> bool {
224+
let a = s.split_sentence_bounds().collect::<Vec<_>>();
225+
let mut b = s.split_sentence_bounds().rev().collect::<Vec<_>>();
226+
b.reverse();
227+
a == b
228+
}
229+
230+
fn quickcheck_join_sentences(s: String) -> bool {
231+
let a = s.split_sentence_bounds().collect::<String>();
232+
a == s
233+
}
234+
*/
176235
}

0 commit comments

Comments
 (0)