Skip to content

Commit 15971a2

Browse files
committed
update for rust nightly
1 parent 3246703 commit 15971a2

File tree

5 files changed

+10
-10
lines changed

5 files changed

+10
-10
lines changed

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@
133133
//! }
134134
//! ```
135135
136-
#![feature(plugin, str_char)]
136+
#![feature(plugin)]
137137
#![cfg_attr(test, feature(test))]
138138
#![plugin(phf_macros)]
139139
#![warn(missing_docs)]

src/token.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ impl Token {
4040
pub fn new(slice: &str, is_el: bool, is_pg: bool, is_nl: bool) -> Token {
4141
debug_assert!(slice.len() > 0);
4242

43-
let first = slice.char_at(0);
43+
let first = slice.chars().nth(0).unwrap();
4444
let mut has_punct = false;
4545

4646
// Add a period to any tokens without a period. This is an optimization

src/tokenizer.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ impl<'a, P> PeriodContextTokenizer<'a, P> where P: DefinesNonWordCharacters + De
4545
let mut pos = self.pos;
4646

4747
while pos < self.doc.len() {
48-
let cur = self.doc.char_at(pos);
48+
let mut iter = self.doc[pos..].chars();
49+
let cur = iter.nth(0).unwrap();
4950

5051
match cur {
5152
// A whitespace is reached before a sentence ending character
@@ -55,7 +56,7 @@ impl<'a, P> PeriodContextTokenizer<'a, P> where P: DefinesNonWordCharacters + De
5556
// of a new token (if there is a space after it, or if the next
5657
// character is puntuation).
5758
c if P::is_sentence_ending(&c) => {
58-
let nxt = self.doc.char_at(pos + cur.len_utf8());
59+
let nxt = iter.next().unwrap();
5960

6061
if nxt.is_whitespace() || P::is_nonword_char(&nxt) {
6162
break;
@@ -86,7 +87,7 @@ impl<'a, P> Iterator for PeriodContextTokenizer<'a, P>
8687
let mut state: u8 = 0;
8788

8889
while self.pos < self.doc.len() {
89-
let cur = self.doc.char_at(self.pos);
90+
let cur = self.doc[self.pos..].chars().next().unwrap();
9091

9192
macro_rules! return_token(
9293
() => (
@@ -240,7 +241,7 @@ impl<'a, P> Iterator for WordTokenizer<'a, P>
240241
);
241242

242243
while self.pos < self.doc.len() {
243-
let cur = self.doc.char_at(self.pos);
244+
let cur = self.doc[self.pos..].chars().next().unwrap();
244245

245246
// Periods or dashes are the start of multi-chars. A multi-char
246247
// is defined as an ellipsis or hyphen (multiple-dashes). If there
@@ -496,7 +497,7 @@ fn orthographic_heuristic<P>(tok: &Token, data: &TrainingData) -> Option<bool>
496497
{
497498
use prelude::{ORT_LC, MID_UC, ORT_UC, BEG_LC};
498499

499-
if P::is_punctuation(&tok.tok().char_at(0)) {
500+
if P::is_punctuation(&tok.tok().chars().nth(0).unwrap()) {
500501
Some(false)
501502
} else {
502503
let ctxt = data.get_orthographic_context(tok.typ_without_break_or_period());

src/trainer.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use std::hash::{Hash, Hasher};
1414
use std::marker::PhantomData;
1515
use std::collections::{HashSet, HashMap};
1616

17-
use num::Float;
1817
use freqdist::FrequencyDistribution;
1918
use rustc_serialize::json::Json;
2019

@@ -436,7 +435,7 @@ fn is_rare_abbrev_type<P>(data: &TrainingData,
436435
// Already an abbreviation...
437436
if data.contains_abbrev(tok0.typ()) || count >= P::abbrev_upper_bound() {
438437
false
439-
} else if P::is_internal_punctuation(&tok1.typ().char_at(0)) {
438+
} else if P::is_internal_punctuation(&tok1.typ().chars().next().unwrap()) {
440439
true
441440
} else if tok1.is_lowercase() {
442441
let ctxt = data.get_orthographic_context(tok1.typ_without_break_or_period());

src/util.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub fn annotate_first_pass<P: DefinesSentenceEndings>(tok: &mut Token, data: &Tr
2121
.map(|s| data.contains_abbrev(s))
2222
.unwrap_or(false);
2323

24-
if tok.tok().len() == 1 && P::is_sentence_ending(&tok.tok().char_at(0)) {
24+
if tok.tok().len() == 1 && P::is_sentence_ending(&tok.tok().chars().nth(0).unwrap()) {
2525
tok.set_is_sentence_break(true);
2626
} else if tok.has_final_period() && !tok.is_ellipsis() {
2727
if is_split_abbrev || data.contains_abbrev(tok.tok_without_period()) {

0 commit comments

Comments
 (0)