1616# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1717# See the License for the specific language governing permissions and
1818# limitations under the License.
19+ import Base. norm
1920export ngrams, count, tfnorm, sparse_count, norm, znorm, ngram_iterator, ngrams!
2021
2122immutable NgramStringIterator
22- string :: String
23+ string :: AbstractString
2324 order :: Int32
2425 truncated_start :: Bool
2526end
26- type StringPosition
27+ type AbstractStringPosition
2728 start :: Int32
2829 fin :: Int32
2930 nth :: Int32
@@ -35,9 +36,9 @@ function start(ngi :: NgramStringIterator)
3536 for i = 1 : (ngi. order- 1 ) # necessary because strings are indexed to bytes, not characters
3637 idx = nextind (ngi. string, idx)
3738 end
38- return StringPosition (1 , idx, ngi. order)
39+ return AbstractStringPosition (1 , idx, ngi. order)
3940 else
40- return StringPosition (1 , 1 , 1 )
41+ return AbstractStringPosition (1 , 1 , 1 )
4142 end
4243end
4344
6162# -------------------------------------------------------------------------------------------------------------------------
6263# feature extractors
6364# -------------------------------------------------------------------------------------------------------------------------
64- make_string (words :: String , b, e) = SubString (words, b, e)
65+ make_string (words :: AbstractString , b, e) = SubString (words, b, e)
6566make_string (words :: Array , b, e) = join (words[b: e], " " )
6667
6768function ngrams (words:: Array ; order = 2 , truncated_start = false )
68- ret = String []
69+ ret = AbstractString []
6970
7071 if ! truncated_start
7172 for o = 1 : min (order - 1 , length (words))
@@ -81,19 +82,19 @@ function ngrams(words::Array; order = 2, truncated_start = false)
8182 return ret
8283end
8384
84- function ngrams (words:: String ; order = 2 , truncated_start = false )
85- ret = String []
85+ function ngrams (words :: AbstractString ; order = 2 , truncated_start = false )
86+ ret = AbstractString []
8687 return ngrams! (ret, words, order = order, truncated_start = truncated_start)
8788end
8889
89- function ngrams! (ret :: Array , words :: String ; order = 2 , truncated_start = false )
90+ function ngrams! (ret :: Array , words :: AbstractString ; order = 2 , truncated_start = false )
9091 for x in ngram_iterator (words, order = order, truncated_start = truncated_start)
9192 push! (ret, x)
9293 end
9394 return ret
9495end
9596
96- ngram_iterator (words :: String ; order = 2 , truncated_start = false ) = NgramStringIterator (words, order, truncated_start)
97+ ngram_iterator (words :: AbstractString ; order = 2 , truncated_start = false ) = NgramStringIterator (words, order, truncated_start)
9798
9899# -------------------------------------------------------------------------------------------------------------------------
99100# feature vector operations
@@ -107,7 +108,7 @@ function sparse_count(text, bkg)
107108end
108109
109110function dict_count (tokens)
110- map = DefaultDict {String ,Int32} ()
111+ map = DefaultDict {AbstractString ,Int32} ()
111112 for w in tokens
112113 map[w] += 1
113114 end
0 commit comments