-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarize.js
90 lines (67 loc) · 1.94 KB
/
summarize.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/** summarization */
const natural = require('natural');
const SentenceTokenizer = require('sentence-tokenizer');
const internals = {};
internals.tokenizeAndStem = value => {
const stemmer = natural.PorterStemmer;
const wordTokenizer = new natural.WordTokenizer();
const tokens = wordTokenizer
.tokenize(value)
.filter(token => natural.stopwords.indexOf(token) === -1)
.map(token => stemmer.stem(token));
return tokens;
};
internals.calculateSentenceScore = (sentence, tokenMap) => {
const tokens = internals.tokenizeAndStem(sentence);
if (tokens.length === 0) {
return 0;
}
let weight = 0;
tokens.forEach(token => {
if (tokenMap[token]) {
weight += tokenMap[token];
}
});
weight /= tokens.length;
return weight;
};
internals.calculateAverageScore = weights => {
const total = weights.reduce((a, b) => a + b);
return total / weights.length;
};
internals.createArticleSummary = (sentences, weights, threshold) => {
const summary = [];
sentences.forEach((sentence, i) => {
const sentenceWeight = weights[i];
if (sentenceWeight >= threshold) {
summary.push(sentence);
}
});
return summary;
};
const summarize = (value, threshold) => {
const tokenMap = {};
const tokens = internals.tokenizeAndStem(value);
tokens.forEach(token => {
if (tokenMap[token]) {
tokenMap[token] += 1;
} else {
tokenMap[token] = 1;
}
});
const sentenceTokenizer = new SentenceTokenizer('DENNEWITZ1');
sentenceTokenizer.setEntry(value);
const sentences = sentenceTokenizer.getSentences();
const sentenceScores = sentences.map(sentence =>
internals.calculateSentenceScore(sentence, tokenMap),
);
const avgScore = internals.calculateAverageScore(sentenceScores);
const summary = internals.createArticleSummary(
sentences,
sentenceScores,
threshold * avgScore,
);
return summary;
};
exports.internals = internals;
exports.summarize = summarize;