Skip to content

Commit

Permalink
feat: Add support for ranges in the --accept option / config field (#…
Browse files Browse the repository at this point in the history
…1167)

Adds support for accept ranges discussed in #1157. This allows the user to specify custom HTTP status codes accepted during checking and thus will report as valid (not broken). The accept option only supports specifying status codes as a comma-separated list. With this PR, the option will accept a list of status code ranges formatted like this:

```toml
accept = ["100..=103", "200..=299", "403"]
```

These combinations will be supported: `..<end>`, ` ..=<end>`, `<start>..<end>` and `<start>..=<end>`.
The behavior is copied from the Rust Range like concepts:

```
    ..<end>, includes 0 to <end> (exclusive)
    ..=<end>, includes 0 to <end> (inclusive)
    <start>..<end>, includes <start> to <end> (exclusive)
    <start>..=<end>, includes <start> to <end> (inclusive)
```


- Foundation and enhancements for accept ranges, including support for comma-separated strings and integration into the CLI.
- Implementations and updates for AcceptSelector, including Default, Display, and serde defaults.
- Address and fix various errors: clippy, cargo fmt, and tests.
- Add more tests, address edge cases, and enhance error messaging, especially for TOML config parsing.
- Update dependencies.
  • Loading branch information
Techassi authored Sep 17, 2023
1 parent f5fe25a commit 1b1fd0c
Show file tree
Hide file tree
Showing 16 changed files with 598 additions and 58 deletions.
52 changes: 52 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 37 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,30 @@ Available as a command-line utility, a library and a [GitHub Action](https://git
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
## Table of Contents

- [Table of Contents](#table-of-contents)
- [Installation](#installation)
- [Arch Linux](#arch-linux)
- [macOS](#macos)
- [Docker](#docker)
- [NixOS](#nixos)
- [FreeBSD](#freebsd)
- [Scoop](#scoop)
- [Termux](#termux)
- [Pre-built binaries](#pre-built-binaries)
- [Cargo](#cargo)
- [Build dependencies](#build-dependencies)
- [Compile and install lychee](#compile-and-install-lychee)
- [Feature flags](#feature-flags)
- [Features](#features)
- [Commandline usage](#commandline-usage)
- [Docker Usage](#docker-usage)
- [Linux/macOS shell command](#linuxmacos-shell-command)
- [Windows PowerShell command](#windows-powershell-command)
- [GitHub Token](#github-token)
- [Commandline Parameters](#commandline-parameters)
- [Exit codes](#exit-codes)
- [Ignoring links](#ignoring-links)
- [Caching](#caching)
- [Library usage](#library-usage)
- [GitHub Action Usage](#github-action-usage)
- [Contributing to lychee](#contributing-to-lychee)
Expand Down Expand Up @@ -384,7 +405,22 @@ Options:
Custom request header
-a, --accept <ACCEPT>
Comma-separated list of accepted status codes for valid links
A List of accepted status codes for valid links
The following accept range syntax is supported: [start]..[=]end|code. Some valid
examples are:
- 200..=204
- 200..204
- ..=204
- ..204
- 200
Use "lychee --accept '200..=204, 429, 500' <inputs>..." to provide a comma-
separated list of accepted status codes. This example will accept 200, 201,
202, 203, 204, 429, and 500 as valid status codes.
[default: 100..=103,200..=299,403..=403]
--include-fragments
Enable the checking of fragments in links
Expand Down
8 changes: 7 additions & 1 deletion fixtures/configs/smoketest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,13 @@ timeout = 20
retry_wait_time = 2

# Comma-separated list of accepted status codes for valid links.
accept = [200, 429]
# Supported values are:
#
# accept = ["200..=204", "429"]
# accept = "200..=204, 429"
# accept = ["200", "429"]
# accept = "200, 429"
accept = ["200", "429"]

# Proceed for server connections considered insecure (invalid TLS).
insecure = false
Expand Down
17 changes: 7 additions & 10 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,13 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
cfg.scheme.clone()
};

let accepted = match cfg.accept {
Some(ref accepted) => {
let accepted: Result<HashSet<_>, _> = accepted
.iter()
.map(|code| StatusCode::from_u16(*code))
.collect();
Some(accepted?)
}
None => None,
};
let accepted = cfg
.accept
.clone()
.into_set()
.iter()
.map(|value| StatusCode::from_u16(*value))
.collect::<Result<HashSet<_>, _>>()?;

// `exclude_mail` will be removed in 1.0. Until then, we need to support it.
// Therefore, we need to check if both `include_mail` and `exclude_mail` are set to `true`
Expand Down
8 changes: 4 additions & 4 deletions lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ where

let client = params.client;
let cache = params.cache;
let accept = params.cfg.accept;
let accept = params.cfg.accept.into_set();

let pb = if params.cfg.no_progress || params.cfg.verbose.log_level() >= log::Level::Info {
None
Expand Down Expand Up @@ -207,7 +207,7 @@ async fn request_channel_task(
max_concurrency: usize,
client: Client,
cache: Arc<Cache>,
accept: Option<HashSet<u16>>,
accept: HashSet<u16>,
) {
StreamExt::for_each_concurrent(
ReceiverStream::new(recv_req),
Expand All @@ -230,7 +230,7 @@ async fn handle(
client: &Client,
cache: Arc<Cache>,
request: Request,
accept: Option<HashSet<u16>>,
accept: HashSet<u16>,
) -> Response {
let uri = request.uri.clone();
if let Some(v) = cache.get(&uri) {
Expand All @@ -244,7 +244,7 @@ async fn handle(
// `accepted` status codes might have changed from the previous run
// and they may have an impact on the interpretation of the status
// code.
Status::from_cache_status(v.value().status, accept)
Status::from_cache_status(v.value().status, &accept)
};
return Response::new(uri.clone(), status, request.source);
}
Expand Down
37 changes: 28 additions & 9 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use crate::archive::Archive;
use crate::parse::{parse_base, parse_statuscodes};
use crate::parse::parse_base;
use crate::verbosity::Verbosity;
use anyhow::{anyhow, Context, Error, Result};
use clap::{arg, builder::TypedValueParser, Parser};
use const_format::{concatcp, formatcp};
use lychee_lib::{
Base, BasicAuthSelector, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
AcceptSelector, Base, BasicAuthSelector, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
};
use secrecy::{ExposeSecret, SecretString};
use serde::Deserialize;
use std::path::Path;
use std::{collections::HashSet, fs, path::PathBuf, str::FromStr, time::Duration};
use std::{fs, path::PathBuf, str::FromStr, time::Duration};
use strum::VariantNames;

pub(crate) const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
Expand Down Expand Up @@ -91,6 +91,7 @@ default_function! {
retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS;
method: String = DEFAULT_METHOD.to_string();
verbosity: Verbosity = Verbosity::default();
accept_selector: AcceptSelector = AcceptSelector::default();
}

// Macro for merging configuration values
Expand Down Expand Up @@ -304,10 +305,28 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) header: Vec<String>,

/// Comma-separated list of accepted status codes for valid links
#[arg(short, long, value_parser = parse_statuscodes)]
#[serde(default)]
pub(crate) accept: Option<HashSet<u16>>,
/// A List of accepted status codes for valid links
#[arg(
short,
long,
default_value_t,
long_help = "A List of accepted status codes for valid links
The following accept range syntax is supported: [start]..[=]end|code. Some valid
examples are:
- 200..=204
- 200..204
- ..=204
- ..204
- 200
Use \"lychee --accept '200..=204, 429, 500' <inputs>...\" to provide a comma-
separated list of accepted status codes. This example will accept 200, 201,
202, 203, 204, 429, and 500 as valid status codes."
)]
#[serde(default = "accept_selector")]
pub(crate) accept: AcceptSelector,

/// Enable the checking of fragments in links.
#[arg(long)]
Expand Down Expand Up @@ -389,7 +408,8 @@ impl Config {
pub(crate) fn load_from_file(path: &Path) -> Result<Config> {
// Read configuration file
let contents = fs::read_to_string(path)?;
toml::from_str(&contents).context("Failed to parse configuration file")
toml::from_str(&contents)
.map_err(|err| anyhow::anyhow!("Failed to parse configuration file: {}", err))
}

/// Merge the configuration from TOML into the CLI configuration
Expand Down Expand Up @@ -421,7 +441,6 @@ impl Config {
exclude_mail: false;
remap: Vec::<String>::new();
header: Vec::<String>::new();
accept: None;
timeout: DEFAULT_TIMEOUT_SECS;
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
method: DEFAULT_METHOD;
Expand Down
25 changes: 1 addition & 24 deletions lychee-bin/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use anyhow::{anyhow, Context, Result};
use headers::{HeaderMap, HeaderName};
use lychee_lib::{remap::Remaps, Base};
use std::{collections::HashSet, time::Duration};
use std::time::Duration;

/// Split a single HTTP header into a (key, value) tuple
fn read_header(input: &str) -> Result<(String, String)> {
Expand Down Expand Up @@ -40,24 +40,8 @@ pub(crate) fn parse_base(src: &str) -> Result<Base, lychee_lib::ErrorKind> {
Base::try_from(src)
}

/// Parse HTTP status codes into a set of `StatusCode`
///
/// Note that this function does not convert the status codes into
/// `StatusCode` but rather into `u16` to avoid the need for
/// `http` as a dependency and to support custom status codes, which are
/// necessary for some websites, which don't adhere to the HTTP spec or IANA.
pub(crate) fn parse_statuscodes(accept: &str) -> Result<HashSet<u16>> {
let mut statuscodes = HashSet::new();
for code in accept.split(',') {
let code: u16 = code.parse::<u16>()?;
statuscodes.insert(code);
}
Ok(statuscodes)
}

#[cfg(test)]
mod tests {
use std::collections::HashSet;

use headers::HeaderMap;
use regex::Regex;
Expand All @@ -72,13 +56,6 @@ mod tests {
assert_eq!(parse_headers(&["accept=text/html"]).unwrap(), custom);
}

#[test]
fn test_parse_statuscodes() {
let actual = parse_statuscodes("200,204,301").unwrap();
let expected = IntoIterator::into_iter([200, 204, 301]).collect::<HashSet<_>>();
assert_eq!(actual, expected);
}

#[test]
fn test_parse_remap() {
let remaps =
Expand Down
2 changes: 2 additions & 0 deletions lychee-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ doc-comment = "0.3.3"
tempfile = "3.8.0"
wiremock = "0.5.19"
serde_json = "1.0.105"
rstest = "0.18.1"
toml = "0.7.6"

[features]

Expand Down
6 changes: 3 additions & 3 deletions lychee-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ pub use crate::{
collector::Collector,
filter::{Excludes, Filter, Includes},
types::{
uri::valid::Uri, Base, BasicAuthCredentials, BasicAuthSelector, CacheStatus, CookieJar,
ErrorKind, FileType, Input, InputContent, InputSource, Request, Response, ResponseBody,
Result, Status,
uri::valid::Uri, AcceptRange, AcceptRangeError, AcceptSelector, Base, BasicAuthCredentials,
BasicAuthSelector, CacheStatus, CookieJar, ErrorKind, FileType, Input, InputContent,
InputSource, Request, Response, ResponseBody, Result, Status,
},
};
5 changes: 5 additions & 0 deletions lychee-lib/src/types/accept/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod range;
mod selector;

pub use range::*;
pub use selector::*;
Loading

0 comments on commit 1b1fd0c

Please sign in to comment.