-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.rs
142 lines (131 loc) · 4.11 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
use colored::*;
use lindera::tokenizer::Token;
use lindera::tokenizer::Tokenizer;
use lindera::LinderaResult;
use std::fmt::*;
use std::fs::*;
use std::io::*;
use std::*;
use structopt::StructOpt;
#[derive(StructOpt)]
struct Cli {
#[structopt(parse(from_os_str))]
path: Option<std::path::PathBuf>,
}
#[derive(Debug, Eq, Ord, PartialEq, PartialOrd)]
struct WordCount {
text: String,
count: u32,
}
#[derive(Debug)]
struct TokenCount {
text: String,
detail: Vec<String>,
count: u32,
}
impl TokenCount {
fn new(token: &Token) -> TokenCount {
TokenCount {
text: token.text.to_string(),
detail: token.detail.clone(),
count: 1,
}
}
}
fn main() -> LinderaResult<()> {
let tokenizer = Tokenizer::new()?;
let contents = get_raw_contents();
let tokens = tokenizer.tokenize(contents.as_str())?;
let ignore_words: Vec<&str> = vec!["的", "レベル", "こと", "毎"];
// let mut sorted_word_count = words_count(&tokens);
// sorted_word_count.sort_by(|a, b| b.count.cmp(&a.count));
// println!("{:?}", sorted_word_count);
let mut sorted_token_count = tokens_count(&tokens);
sorted_token_count.sort_by(|a, b| b.count.cmp(&a.count));
let mut frequent_noun: Vec<String> = Vec::new();
let mut count_nouns = 3;
for token_count in sorted_token_count.iter() {
println!("{:?}", token_count);
if token_count.detail[0] == "名詞"
&& count_nouns > 0
&& !ignore_words.contains(&token_count.text.as_str())
{
frequent_noun.push(token_count.text.clone());
count_nouns -= 1;
}
}
for token in tokens {
match token.detail[0].as_str() {
"接頭詞" => print!("{}", token.text.blue()),
"助詞" => print!("{}", token.text.blue()),
"名詞" => {
if frequent_noun.contains(&token.text.to_string()) {
if token.text == frequent_noun[0] {
print!("{}", token.text.bright_yellow());
} else if token.text == frequent_noun[1] {
print!("{}", token.text.bright_red());
} else {
print!("{}", token.text.bright_green());
}
} else {
print!("{}", token.text.white())
}
}
"動詞" => print!("{}", token.text.white()),
//"助動詞" => print!("{}", token.text.cyan()),
//"連体詞" => print!("{}", token.text.green()),
_ => print!("{}", token.text.blue()),
}
}
Ok(())
}
fn get_raw_contents() -> String {
let args = Cli::from_args();
let mut contents = String::new();
match args.path {
Some(file_path) => {
let mut f = File::open(file_path).expect("file not found");
f.read_to_string(&mut contents)
.expect("something went wrong reading the file");
}
None => {
stdin()
.read_line(&mut contents)
.expect("Failed to read line.");
}
}
contents
}
fn words_count(tokens: &Vec<Token<'_>>) -> Vec<WordCount> {
let mut word_counts: Vec<WordCount> = Vec::new();
for token in tokens {
match word_counts.iter_mut().find(|e| e.text == token.text) {
Some(x) => {
x.count += 1;
}
None => {
word_counts.push(WordCount {
text: token.text.to_string(),
count: 0,
});
}
}
}
return word_counts;
}
fn tokens_count(tokens: &Vec<Token>) -> Vec<TokenCount> {
let mut token_counts: Vec<TokenCount> = Vec::new();
for token in tokens {
match token_counts.iter_mut().find(|e| {
e.text == TokenCount::new(&token).text && e.detail == TokenCount::new(&token).detail
}) {
Some(x) => {
x.count += 1;
}
None => {
token_counts.push(TokenCount::new(token));
}
}
}
return token_counts;
}