Skip to content

feat(harper-cli): make lint accept user & file-local dictionary #987

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions harper-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ repository = "https://github.com/automattic/harper"
anyhow = "1.0.97"
ariadne = "0.4.1"
clap = { version = "4.5.34", features = ["derive", "string"] }
dirs = "6.0.0"
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.27.0" }
harper-core = { path = "../harper-core", version = "0.27.0" }
harper-comments = { path = "../harper-comments", version = "0.27.0" }
Expand Down
77 changes: 66 additions & 11 deletions harper-cli/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
#![doc = include_str!("../README.md")]

use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::process;
use std::path::{Component, Path, PathBuf};
use std::sync::Arc;
use std::{fs, process};

use anyhow::format_err;
use ariadne::{Color, Label, Report, ReportKind, Source};
use clap::Parser;
use dirs::{config_dir, data_local_dir};
use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::{Markdown, MarkdownOptions};
use harper_core::{
remove_overlaps, CharStringExt, Dialect, Dictionary, Document, FstDictionary,
MutableDictionary, TokenKind, TokenStringExt, WordId,
remove_overlaps, CharStringExt, Dialect, Dictionary, Document, FstDictionary, MergedDictionary,
MutableDictionary, TokenKind, TokenStringExt, WordId, WordMetadata,
};
use harper_literate_haskell::LiterateHaskellParser;
use hashbrown::HashMap;
Expand All @@ -37,6 +39,12 @@ enum Args {
/// Specify the dialect.
#[arg(short, long, default_value = Dialect::American.to_string())]
dialect: Dialect,
/// Path to the user dictionary.
#[arg(short, long, default_value = config_dir().unwrap().join("harper-ls/dictionary.txt").into_os_string())]
user_dict_path: PathBuf,
/// Path to the directory for file-local dictionaries.
#[arg(short, long, default_value = data_local_dir().unwrap().join("harper-ls/file_dictionaries/").into_os_string())]
file_dict_path: PathBuf,
},
/// Parse a provided document and print the detected symbols.
Parse {
Expand Down Expand Up @@ -77,10 +85,26 @@ fn main() -> anyhow::Result<()> {
count,
only_lint_with,
dialect,
user_dict_path,
file_dict_path,
} => {
let (doc, source) = load_file(&file, markdown_options)?;
let mut merged_dict = MergedDictionary::new();
merged_dict.add_dictionary(dictionary);

let mut linter = LintGroup::new_curated(dictionary, dialect);
match load_dict(&user_dict_path) {
Ok(user_dict) => merged_dict.add_dictionary(Arc::new(user_dict)),
Err(err) => println!("{}: {}", user_dict_path.display(), err),
}

let file_dict_path = file_dict_path.join(file_dict_name(&file));
match load_dict(&file_dict_path) {
Ok(file_dict) => merged_dict.add_dictionary(Arc::new(file_dict)),
Err(err) => println!("{}: {}", file_dict_path.display(), err),
}

let (doc, source) = load_file(&file, markdown_options, &merged_dict)?;

let mut linter = LintGroup::new_curated(Arc::new(merged_dict), dialect);

if let Some(rules) = only_lint_with {
linter.set_all_rules_to(Some(false));
Expand Down Expand Up @@ -127,7 +151,7 @@ fn main() -> anyhow::Result<()> {
process::exit(1)
}
Args::Parse { file } => {
let (doc, _) = load_file(&file, markdown_options)?;
let (doc, _) = load_file(&file, markdown_options, &dictionary)?;

for token in doc.tokens() {
let json = serde_json::to_string(&token)?;
Expand All @@ -140,7 +164,7 @@ fn main() -> anyhow::Result<()> {
file,
include_newlines,
} => {
let (doc, source) = load_file(&file, markdown_options)?;
let (doc, source) = load_file(&file, markdown_options, &dictionary)?;

let primary_color = Color::Blue;
let secondary_color = Color::Magenta;
Expand Down Expand Up @@ -297,7 +321,7 @@ fn main() -> anyhow::Result<()> {
Ok(())
}
Args::MineWords { file } => {
let (doc, _source) = load_file(&file, MarkdownOptions::default())?;
let (doc, _source) = load_file(&file, MarkdownOptions::default(), &dictionary)?;

let mut words = HashMap::new();

Expand Down Expand Up @@ -326,7 +350,11 @@ fn main() -> anyhow::Result<()> {
}
}

fn load_file(file: &Path, markdown_options: MarkdownOptions) -> anyhow::Result<(Document, String)> {
fn load_file(
file: &Path,
markdown_options: MarkdownOptions,
dictionary: &impl Dictionary,
) -> anyhow::Result<(Document, String)> {
let source = std::fs::read_to_string(file)?;

let parser: Box<dyn harper_core::parsers::Parser> =
Expand All @@ -343,7 +371,7 @@ fn load_file(file: &Path, markdown_options: MarkdownOptions) -> anyhow::Result<(
),
};

Ok((Document::new_curated(&source, &parser), source))
Ok((Document::new(&source, &parser, dictionary), source))
}

/// Split a dictionary line into its word and annotation segments
Expand Down Expand Up @@ -371,3 +399,30 @@ fn print_word_derivations(word: &str, annot: &str, dictionary: &impl Dictionary)
println!(" - {}", child_str);
}
}

/// Sync version of harper-ls/src/dictionary_io@load_dict
fn load_dict(path: &Path) -> anyhow::Result<MutableDictionary> {
let str = fs::read_to_string(path)?;

let mut dict = MutableDictionary::new();
dict.extend_words(
str.lines()
.map(|l| (l.chars().collect::<Vec<_>>(), WordMetadata::default())),
);

Ok(dict)
}

/// Path version of harper-ls/src/dictionary_io@file_dict_name
fn file_dict_name(path: &Path) -> PathBuf {
let mut rewritten = String::new();

for seg in path.components() {
if !matches!(seg, Component::RootDir) {
rewritten.push_str(&seg.as_os_str().to_string_lossy());
rewritten.push('%');
}
}

rewritten.into()
}
Loading