Skip to content

feat: wrote first draft of statistics logging #454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b563994
feat: wrote first draft of statistics logging
elijah-potter Jan 17, 2025
42476b6
chore(ls): remove dead code
elijah-potter Jan 21, 2025
1f113f4
feat(stats): moved statistics stuff to its own crate
elijah-potter Jan 23, 2025
223d165
feat(cli): ability to summarize `stats.txt` files
elijah-potter Jan 23, 2025
f60140e
fix(stats): appease clippy
elijah-potter Jan 23, 2025
7e4a7b8
Merge branch 'master' into local-stats
elijah-potter Jan 23, 2025
d0a63e1
fix(stats): crate version
elijah-potter Jan 23, 2025
fa5d139
Merge branch 'master' into local-stats
elijah-potter Mar 21, 2025
264c2c2
feat(stats): reorganize modules
elijah-potter Mar 25, 2025
f96a06e
feat(stats): use JSON rows
elijah-potter Mar 26, 2025
88b0aea
feat(stats): store `LintGroupConfig` changes
elijah-potter Mar 27, 2025
4832a32
feat(harper.js): now logs lint application statistics
elijah-potter Mar 27, 2025
5f34e7f
feat(harper.js): expose stats logging and summarization
elijah-potter Mar 27, 2025
080c5ad
Merge branch 'master' into local-stats
elijah-potter Mar 27, 2025
300c5da
fix(stats): can now use time on WASM targets
elijah-potter Mar 27, 2025
a0f7721
fix(fmt): ran `just format`
elijah-potter Mar 31, 2025
29f03d8
feat(stats): now includes lint context + misspelled words
elijah-potter Mar 31, 2025
0bfe22f
feat(harper.js): can query summary with time range
elijah-potter Mar 31, 2025
70c0419
feat(stats): count total applied suggestions
elijah-potter Mar 31, 2025
4446e3c
test(harper.js): ensure summaries can cross worker boundary
elijah-potter Mar 31, 2025
a24bc19
feat(harper.js): pass entire summary out
elijah-potter Mar 31, 2025
3fbeb95
Merge branch 'master' into local-stats
elijah-potter Apr 1, 2025
fb5a775
feat(web): created dashboard for `stats.txt` files
elijah-potter Apr 1, 2025
4041799
refactor(web): rename `LintChart`
elijah-potter Apr 1, 2025
f17677b
docs(core): wrote page about statistics
elijah-potter Apr 2, 2025
e5a5f7e
Merge branch 'master' into local-stats
elijah-potter Apr 2, 2025
b6c4a90
docs(stats): fix minor grammatical error + bad import
elijah-potter Apr 3, 2025
1a7c860
feat(stats): use new `FatStringToken` to make logs more readable
elijah-potter Apr 3, 2025
4870b34
fix(stats): remove redundant `to_string` call
elijah-potter Apr 3, 2025
2b9578c
fix(stats): remove bad prop tests
elijah-potter Apr 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst" ]
members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst" , "harper-stats"]
resolver = "2"

[profile.release]
Expand Down
1 change: 1 addition & 0 deletions harper-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ repository = "https://github.com/automattic/harper"
anyhow = "1.0.97"
ariadne = "0.4.1"
clap = { version = "4.5.34", features = ["derive", "string"] }
harper-stats = { path = "../harper-stats", version = "0.27.0" }
dirs = "6.0.0"
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.27.0" }
harper-core = { path = "../harper-core", version = "0.27.0" }
Expand Down
18 changes: 16 additions & 2 deletions harper-cli/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#![doc = include_str!("../README.md")]

use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::BufReader;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use std::{fs, process};
Expand All @@ -17,7 +19,7 @@ use harper_core::{
MutableDictionary, TokenKind, TokenStringExt, WordId, WordMetadata,
};
use harper_literate_haskell::LiterateHaskellParser;
use hashbrown::HashMap;
use harper_stats::Stats;
use serde::Serialize;

/// A debugging tool for the Harper grammar checker.
Expand Down Expand Up @@ -65,6 +67,8 @@ enum Args {
Forms { line: String },
/// Emit a decompressed, line-separated list of the words in Harper's dictionary.
Words,
/// Summarize a lint record
SummarizeLintRecord { file: PathBuf },
/// Print the default config with descriptions.
Config,
/// Print a list of all the words in a document, sorted by frequency.
Expand Down Expand Up @@ -225,6 +229,16 @@ fn main() -> anyhow::Result<()> {

Ok(())
}
Args::SummarizeLintRecord { file } => {
let file = File::open(file)?;
let mut reader = BufReader::new(file);
let stats = Stats::read(&mut reader)?;

let summary = stats.summarize();
println!("{summary}");

Ok(())
}
Args::Forms { line } => {
let (word, annot) = line_to_parts(&line);

Expand Down
21 changes: 20 additions & 1 deletion harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ use crate::patterns::{
};
use crate::punctuation::Punctuation;
use crate::vec_ext::VecExt;
use crate::{Dictionary, FatToken, FstDictionary, Lrc, Token, TokenKind, TokenStringExt};
use crate::{
Dictionary, FatStringToken, FatToken, FstDictionary, Lrc, Token, TokenKind, TokenStringExt,
};
use crate::{NumberSuffix, Span};

/// A document containing some amount of lexed and parsed English text.
Expand All @@ -37,6 +39,18 @@ impl Document {
.collect()
}

/// Locate all the tokens that intersect a provided span and convert them to [`FatToken`]s.
///
/// Desperately needs optimization.
pub fn fat_tokens_intersecting(&self, span: Span) -> Vec<FatToken> {
let indices = self.token_indices_intersecting(span);

indices
.into_iter()
.map(|i| self.tokens[i].to_fat(&self.source))
.collect()
}

/// Lexes and parses text to produce a document using a provided language
/// parser and dictionary.
pub fn new(text: &str, parser: &impl Parser, dictionary: &impl Dictionary) -> Self {
Expand Down Expand Up @@ -245,6 +259,11 @@ impl Document {
self.tokens().map(|token| token.to_fat(&self.source))
}

/// Get an iterator over all the tokens contained in the document.
pub fn fat_string_tokens(&self) -> impl Iterator<Item = FatStringToken> + '_ {
self.fat_tokens().map(|t| t.into())
}

pub fn get_span_content(&self, span: &Span) -> &[char] {
span.get_content(&self.source)
}
Expand Down
29 changes: 27 additions & 2 deletions harper-core/src/fat_token.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
use serde::{Deserialize, Serialize};

use crate::TokenKind;
use crate::{CharStringExt, TokenKind};

/// A [`Token`](crate::Token) that holds its content as a fat [`Vec<char>`] rather than as a
/// [`Span`](crate::Span).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Hash)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Hash, Eq)]
pub struct FatToken {
pub content: Vec<char>,
pub kind: TokenKind,
}

impl From<FatStringToken> for FatToken {
fn from(value: FatStringToken) -> Self {
Self {
content: value.content.chars().collect(),
kind: value.kind,
}
}
}

/// Similar to a [`FatToken`], but uses a [`String`] as the underlying store.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Hash, Eq)]
pub struct FatStringToken {
pub content: String,
pub kind: TokenKind,
}

impl From<FatToken> for FatStringToken {
fn from(value: FatToken) -> Self {
Self {
content: value.content.to_string(),
kind: value.kind,
}
}
}
2 changes: 1 addition & 1 deletion harper-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use std::collections::VecDeque;
pub use char_string::{CharString, CharStringExt};
pub use currency::Currency;
pub use document::Document;
pub use fat_token::FatToken;
pub use fat_token::{FatStringToken, FatToken};
pub use ignored_lints::IgnoredLints;
use linting::Lint;
pub use mask::{Mask, Masker};
Expand Down
2 changes: 1 addition & 1 deletion harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ use crate::linting::{closed_compounds, phrase_corrections};
use crate::{CharString, Dialect, Document, TokenStringExt};
use crate::{Dictionary, MutableDictionary};

#[derive(Debug, Serialize, Deserialize, Default, Clone)]
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq, Eq)]
#[serde(transparent)]
pub struct LintGroupConfig {
inner: HashMap<String, Option<bool>>,
Expand Down
17 changes: 16 additions & 1 deletion harper-core/src/linting/lint_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize};
/// The general category a [`Lint`](super::Lint) falls into.
/// There's no reason not to add a new item here if you are adding a new rule that doesn't fit
/// the existing categories.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Is, Default, Hash, PartialEq)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Is, Default, Hash, PartialEq, Eq)]
pub enum LintKind {
/// This should only be used by linters doing spellcheck on individual words.
Spelling,
Expand All @@ -22,6 +22,21 @@ pub enum LintKind {
}

impl LintKind {
pub fn new_from_str(s: &str) -> Option<Self> {
Some(match s {
"Spelling" => LintKind::Spelling,
"Capitalization" => LintKind::Capitalization,
"Formatting" => LintKind::Formatting,
"Repetition" => LintKind::Repetition,
"Readability" => LintKind::Readability,
"Miscellaneous" => LintKind::Miscellaneous,
"Enhancement" => LintKind::Enhancement,
"Word Choice" => LintKind::WordChoice,
"Style" => LintKind::Style,
_ => return None,
})
}

/// Produce a string representation, which can be used as keys in a map or CSS variables.
pub fn to_string_key(&self) -> String {
match self {
Expand Down
1 change: 1 addition & 0 deletions harper-ls/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ readme = "README.md"
repository = "https://github.com/automattic/harper"

[dependencies]
harper-stats = { path = "../harper-stats", version = "0.27.0" }
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.27.0" }
harper-core = { path = "../harper-core", version = "0.27.0", features = ["concurrent"] }
harper-comments = { path = "../harper-comments", version = "0.27.0" }
Expand Down
Loading
Loading