Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod testing_resources;

use clap::Parser;
use std::fs;
use std::collections::HashMap;

/// wc impl in rust
#[derive(Parser, Debug)]
Expand All @@ -19,6 +20,10 @@ struct Args {
#[arg(short = 'w')]
should_words: bool,

/// Print the count frequency of words in the input files, and print the top 10 most frequent words
#[arg(short = 'f')]
should_count_word_frequency: bool,

/// Paths to input files we want to `wc`. If more than one input file is
/// specified, a line of cumulative counts for all the files is displayed
/// on a separate line after the output for the last file.
Expand All @@ -30,21 +35,27 @@ fn main() {
let should_words: bool;
let should_lines: bool;
let should_characters: bool;
let should_count_word_frequency: bool;

let mut should_exit_with_err: bool = false;
if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words {
// Compat with wc behavior, no flags passed means all these should be on.
should_characters = true;
should_lines = true;
should_words = true;
should_count_word_frequency = true;
} else {
should_characters = parsed_args.should_characters;
should_lines = parsed_args.should_lines;
should_words = parsed_args.should_words;
should_count_word_frequency = parsed_args.should_count_word_frequency;
}

let mut total_words: usize = 0;
let mut total_lines: usize = 0;
let mut total_characters: usize = 0;
let mut combined_word_frequency_maps_in_all_contents: HashMap<String, usize> = HashMap::new();

for path in parsed_args.paths.iter() {
let file_contents = match fs::read_to_string(path.clone()) {
Ok(x) => x,
Expand All @@ -69,6 +80,14 @@ fn main() {
total_characters += characters_in_this_content;
print!("{:>8}", characters_in_this_content);
}

if should_count_word_frequency {
let word_map_frequency_in_this_content = tokenize_words_in_content(&file_contents);
let word_map_frequency_in_this_content_clone = word_map_frequency_in_this_content.clone();
let arranged_word_map_frequency_in_this_content = arrange_hashmap(word_map_frequency_in_this_content);
pretty_print_a_vector(arranged_word_map_frequency_in_this_content);
combined_word_frequency_maps_in_all_contents = combine_hashmaps(combined_word_frequency_maps_in_all_contents, word_map_frequency_in_this_content_clone);
}
println!(" {}", path)
}
// Now if more than 1 path, print total
Expand All @@ -82,6 +101,10 @@ fn main() {
if should_characters {
print!("{:>8}", total_characters);
}
if should_count_word_frequency {
let arranged_word_map_frequency_in_all_contents = arrange_hashmap(combined_word_frequency_maps_in_all_contents);
pretty_print_a_vector(arranged_word_map_frequency_in_all_contents)
}
println!(" total")
}
if should_exit_with_err {
Expand All @@ -106,12 +129,45 @@ fn count_words_in_content(content: &str) -> usize {
content.split_ascii_whitespace().count()
}

fn tokenize_words_in_content(content: &str) -> HashMap<String, usize> {
let mut word_map_frequency: HashMap<String, usize> = HashMap::new();

for word in content.split_ascii_whitespace() {
*word_map_frequency.entry(word.to_string()).or_insert(0) += 1;
}
word_map_frequency
}

fn combine_hashmaps(map1: HashMap<String, usize>, map2: HashMap<String, usize>) -> HashMap<String, usize> {
let mut combined_hashmap = map1;
for (key, value) in map2 {
let counter = combined_hashmap.entry(key).or_insert(0);
*counter += value;
}
combined_hashmap
}

fn arrange_hashmap(map: HashMap<String, usize>) -> Vec<(String, usize)> {
let mut sorted_vec: Vec<(String, usize)> = map.iter().map(|(k, v)| (k.clone(), *v)).collect();
sorted_vec.sort_by(|a, b| b.1.cmp(&a.1));
sorted_vec.truncate(10);
sorted_vec
}

fn pretty_print_a_vector(vector: Vec<(String, usize)> ) {
print!("\n");
for (key, value) in vector {
println!("{} {}", key, value);
}
}

#[cfg(test)]
mod tests {
use crate::testing_resources::EXAMPLE_CONTENT_EMPTY;
use crate::testing_resources::EXAMPLE_CONTENT_FIVE_WORDS;
use crate::testing_resources::EXAMPLE_CONTENT_TEN_CHARS;
use crate::testing_resources::EXAMPLE_CONTENT_WITH_FOUR_LINES;
use crate::testing_resources::EXAMPLE_CONTENT_WITH_FOUR_DISTINCT_WORDS;

use super::*;

Expand All @@ -132,4 +188,14 @@ mod tests {
assert_eq!(10, count_characters_in_content(EXAMPLE_CONTENT_TEN_CHARS));
assert_eq!(0, count_characters_in_content(EXAMPLE_CONTENT_EMPTY));
}

#[test]
fn test_tokenize_words_in_content() {
let tokenaized = tokenize_words_in_content(EXAMPLE_CONTENT_WITH_FOUR_DISTINCT_WORDS);
assert_eq!(4, tokenaized.keys().len());
assert_eq!(&3, tokenaized.get("line").unwrap());
println!("{:?}", tokenaized);
assert_eq!(0, count_characters_in_content(EXAMPLE_CONTENT_EMPTY));
}

}
8 changes: 8 additions & 0 deletions src/testing_resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ Hamilton";
#[cfg(test)]
pub const EXAMPLE_CONTENT_TEN_CHARS: &str = "asdf
asdf!";


#[cfg(test)]
pub const EXAMPLE_CONTENT_WITH_FOUR_DISTINCT_WORDS: &str = "
line one
line two
line three
";