update for rust nightly

ferristseng · ferristseng · commit 15971a2c64ab · 2016-10-05T15:02:14.000-04:00
diff --git a/src/lib.rs b/src/lib.rs
@@ -133,7 +133,7 @@
 //! }
 //! ```
 
-#![feature(plugin, str_char)]
+#![feature(plugin)]
 #![cfg_attr(test, feature(test))]
 #![plugin(phf_macros)]
 #![warn(missing_docs)]
diff --git a/src/token.rs b/src/token.rs
@@ -40,7 +40,7 @@ impl Token {
   pub fn new(slice: &str, is_el: bool, is_pg: bool, is_nl: bool) -> Token {
     debug_assert!(slice.len() > 0);
 
-    let first = slice.char_at(0);
+    let first = slice.chars().nth(0).unwrap();
     let mut has_punct = false;
 
     // Add a period to any tokens without a period. This is an optimization
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -45,7 +45,8 @@ impl<'a, P> PeriodContextTokenizer<'a, P> where P: DefinesNonWordCharacters + De
     let mut pos = self.pos;
 
     while pos < self.doc.len() {
-      let cur = self.doc.char_at(pos);
+      let mut iter = self.doc[pos..].chars();
+      let cur = iter.nth(0).unwrap();
 
       match cur {
         // A whitespace is reached before a sentence ending character
@@ -55,7 +56,7 @@ impl<'a, P> PeriodContextTokenizer<'a, P> where P: DefinesNonWordCharacters + De
         // of a new token (if there is a space after it, or if the next
         // character is puntuation).
         c if P::is_sentence_ending(&c) => {
-          let nxt = self.doc.char_at(pos + cur.len_utf8());
+          let nxt = iter.next().unwrap();
 
           if nxt.is_whitespace() || P::is_nonword_char(&nxt) {
             break;
@@ -86,7 +87,7 @@ impl<'a, P> Iterator for PeriodContextTokenizer<'a, P>
     let mut state: u8 = 0;
 
     while self.pos < self.doc.len() {
-      let cur = self.doc.char_at(self.pos);
+      let cur = self.doc[self.pos..].chars().next().unwrap();
 
       macro_rules! return_token(
         () => (
@@ -240,7 +241,7 @@ impl<'a, P> Iterator for WordTokenizer<'a, P>
     );
 
     while self.pos < self.doc.len() {
-      let cur = self.doc.char_at(self.pos);
+      let cur = self.doc[self.pos..].chars().next().unwrap();
 
       // Periods or dashes are the start of multi-chars. A multi-char
       // is defined as an ellipsis or hyphen (multiple-dashes). If there
@@ -496,7 +497,7 @@ fn orthographic_heuristic<P>(tok: &Token, data: &TrainingData) -> Option<bool>
 {
   use prelude::{ORT_LC, MID_UC, ORT_UC, BEG_LC};
 
-  if P::is_punctuation(&tok.tok().char_at(0)) {
+  if P::is_punctuation(&tok.tok().chars().nth(0).unwrap()) {
     Some(false)
   } else {
     let ctxt = data.get_orthographic_context(tok.typ_without_break_or_period());
diff --git a/src/trainer.rs b/src/trainer.rs
@@ -14,7 +14,6 @@ use std::hash::{Hash, Hasher};
 use std::marker::PhantomData;
 use std::collections::{HashSet, HashMap};
 
-use num::Float;
 use freqdist::FrequencyDistribution;
 use rustc_serialize::json::Json;
 
@@ -436,7 +435,7 @@ fn is_rare_abbrev_type<P>(data: &TrainingData,
     // Already an abbreviation...
     if data.contains_abbrev(tok0.typ()) || count >= P::abbrev_upper_bound() {
       false
-    } else if P::is_internal_punctuation(&tok1.typ().char_at(0)) {
+    } else if P::is_internal_punctuation(&tok1.typ().chars().next().unwrap()) {
       true
     } else if tok1.is_lowercase() {
       let ctxt = data.get_orthographic_context(tok1.typ_without_break_or_period());
diff --git a/src/util.rs b/src/util.rs
@@ -21,7 +21,7 @@ pub fn annotate_first_pass<P: DefinesSentenceEndings>(tok: &mut Token, data: &Tr
                            .map(|s| data.contains_abbrev(s))
                            .unwrap_or(false);
 
-  if tok.tok().len() == 1 && P::is_sentence_ending(&tok.tok().char_at(0)) {
+  if tok.tok().len() == 1 && P::is_sentence_ending(&tok.tok().chars().nth(0).unwrap()) {
     tok.set_is_sentence_break(true);
   } else if tok.has_final_period() && !tok.is_ellipsis() {
     if is_split_abbrev || data.contains_abbrev(tok.tok_without_period()) {