From 9b5e1084edcbc54b2f0defde612a329dbed5e82a Mon Sep 17 00:00:00 2001 From: ChrisMuir Date: Sun, 1 Apr 2018 16:04:20 -0500 Subject: [PATCH] bug fix related to accented chars and 'iconv()', issue #9 --- R/get_fingerprint.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_fingerprint.R b/R/get_fingerprint.R index e68f283..0c2748a 100644 --- a/R/get_fingerprint.R +++ b/R/get_fingerprint.R @@ -64,8 +64,8 @@ get_fingerprint_ngram <- function(vect, numgram = 2, bus_suffix = TRUE, vect <- gsub(regex, "", vect, perl = TRUE) # Rest of the transformations. For each value in vect: get ngrams, filter by # unique, sort alphabetically, paste back together, and normalize encoding. + vect <- iconv(vect, to = "ASCII//TRANSLIT") vect <- strsplit(vect, "", fixed = TRUE) vect <- cpp_get_char_ngrams(vect, numgram = numgram) - vect <- iconv(vect, to = "ASCII//TRANSLIT") return(vect) }