-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.R
More file actions
92 lines (69 loc) · 2.26 KB
/
model.R
File metadata and controls
92 lines (69 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
library(tm)
bigram <- readRDS(file="data/final_bigram_sm.Rda")
trigram <- readRDS(file="data/final_trigram_sm.Rda")
fourgram <- readRDS(file="data/final_fourgram_sm.Rda")
nextWordPredictor <- function(inputTxt) {
if(nchar(inputTxt) > 0) {
#clean input
inputTxt <- tolower(inputTxt)
inputTxt <- removeNumbers(inputTxt)
inputTxt <- removePunctuation(inputTxt)
inputTxt <- stripWhitespace(inputTxt)
#split into words
inputList <- unlist(strsplit(inputTxt, " "))
print(inputList)
numWords <- length(inputList)
print(numWords)
runBigram <- function(words){
bigram[bigram$terms$one == words,]$terms$two
}
runTrigram <- function(words){
trigram[trigram$terms$one == words[1] &
trigram$terms$two == words[2],]$terms$three
}
runFourgram <- function(words) {
fourgram[ fourgram$terms$one == words[1] &
fourgram$terms$two == words[2] &
fourgram$terms$three == words[3],]$terms$four
}
if(numWords == 1) {
#print("running bigram")
predList <- runBigram(inputList[1])
}else if (numWords == 2) {
#print("running trigram")
word1 <- inputList[1]
word2 <- inputList[2]
predList <- runTrigram(c(word1, word2))
if(length(predList) == 0){
#print("Trigram failed running bigram")
predList <- runBigram(word2)
}
}else {
#print("running fourgram")
word1 <- inputList[numWords-2]
word2 <- inputList[numWords-1]
word3 <- inputList[numWords]
predList <- runFourgram(c(word1, word2, word3))
if(length(predList) == 0){
#print("fourgram failed running trigram")
predList <- runTrigram(c(word2,word3))
}
if(length(predList) == 0){
#print("trigram failed running bigram")
predList <- runBigram(word3)
}
}
#Return top n predictors
n <- 4
tp <- length(predList)
if( tp >= n){
predList <- predList[1:n]
}
as.character(predList)
}else{
""
}
}
# ptm <- proc.time()
# nextWordPredictor("the world")
# proc.time() - ptm