1717//! should catch the majority of "broken link" cases.
1818
1919use std:: cell:: { Cell , RefCell } ;
20+ use std:: collections:: hash_map:: Entry ;
2021use std:: collections:: { HashMap , HashSet } ;
21- use std:: io:: ErrorKind ;
22+ use std:: fs;
23+ use std:: iter:: once;
2224use std:: path:: { Component , Path , PathBuf } ;
2325use std:: rc:: Rc ;
2426use std:: time:: Instant ;
25- use std:: { env, fs} ;
2627
2728use html5ever:: tendril:: ByteTendril ;
2829use html5ever:: tokenizer:: {
@@ -110,10 +111,25 @@ macro_rules! t {
110111 } ;
111112}
112113
114+ struct Cli {
115+ docs : PathBuf ,
116+ link_targets_dirs : Vec < PathBuf > ,
117+ }
118+
113119fn main ( ) {
114- let docs = env:: args_os ( ) . nth ( 1 ) . expect ( "doc path should be first argument" ) ;
115- let docs = env:: current_dir ( ) . unwrap ( ) . join ( docs) ;
116- let mut checker = Checker { root : docs. clone ( ) , cache : HashMap :: new ( ) } ;
120+ let cli = match parse_cli ( ) {
121+ Ok ( cli) => cli,
122+ Err ( err) => {
123+ eprintln ! ( "error: {err}" ) ;
124+ usage_and_exit ( 1 ) ;
125+ }
126+ } ;
127+
128+ let mut checker = Checker {
129+ root : cli. docs . clone ( ) ,
130+ link_targets_dirs : cli. link_targets_dirs ,
131+ cache : HashMap :: new ( ) ,
132+ } ;
117133 let mut report = Report {
118134 errors : 0 ,
119135 start : Instant :: now ( ) ,
@@ -125,16 +141,58 @@ fn main() {
125141 intra_doc_exceptions : 0 ,
126142 has_broken_urls : false ,
127143 } ;
128- checker. walk ( & docs, & mut report) ;
144+ checker. walk ( & cli . docs , & mut report) ;
129145 report. report ( ) ;
130146 if report. errors != 0 {
131147 println ! ( "found some broken links" ) ;
132148 std:: process:: exit ( 1 ) ;
133149 }
134150}
135151
152+ fn parse_cli ( ) -> Result < Cli , String > {
153+ fn to_absolute_path ( arg : & str ) -> Result < PathBuf , String > {
154+ std:: path:: absolute ( arg) . map_err ( |e| format ! ( "could not convert to absolute {arg}: {e}" ) )
155+ }
156+
157+ let mut verbatim = false ;
158+ let mut docs = None ;
159+ let mut link_targets_dirs = Vec :: new ( ) ;
160+
161+ let mut args = std:: env:: args ( ) . skip ( 1 ) ;
162+ while let Some ( arg) = args. next ( ) {
163+ if !verbatim && arg == "--" {
164+ verbatim = true ;
165+ } else if !verbatim && ( arg == "-h" || arg == "--help" ) {
166+ usage_and_exit ( 0 )
167+ } else if !verbatim && arg == "--link-targets-dir" {
168+ link_targets_dirs. push ( to_absolute_path (
169+ & args. next ( ) . ok_or ( "missing value for --link-targets-dir" ) ?,
170+ ) ?) ;
171+ } else if !verbatim && let Some ( value) = arg. strip_prefix ( "--link-targets-dir=" ) {
172+ link_targets_dirs. push ( to_absolute_path ( value) ?) ;
173+ } else if !verbatim && arg. starts_with ( '-' ) {
174+ return Err ( format ! ( "unknown flag: {arg}" ) ) ;
175+ } else if docs. is_none ( ) {
176+ docs = Some ( arg) ;
177+ } else {
178+ return Err ( "too many positional arguments" . into ( ) ) ;
179+ }
180+ }
181+
182+ Ok ( Cli {
183+ docs : to_absolute_path ( & docs. ok_or ( "missing first positional argument" ) ?) ?,
184+ link_targets_dirs,
185+ } )
186+ }
187+
188+ fn usage_and_exit ( code : i32 ) -> ! {
189+ eprintln ! ( "usage: linkchecker PATH [--link-targets-dir=PATH ...]" ) ;
190+ std:: process:: exit ( code)
191+ }
192+
136193struct Checker {
137194 root : PathBuf ,
195+ link_targets_dirs : Vec < PathBuf > ,
138196 cache : Cache ,
139197}
140198
@@ -420,37 +478,34 @@ impl Checker {
420478
421479 /// Load a file from disk, or from the cache if available.
422480 fn load_file ( & mut self , file : & Path , report : & mut Report ) -> ( String , & FileEntry ) {
423- // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
424- #[ cfg( windows) ]
425- const ERROR_INVALID_NAME : i32 = 123 ;
426-
427481 let pretty_path =
428482 file. strip_prefix ( & self . root ) . unwrap_or ( file) . to_str ( ) . unwrap ( ) . to_string ( ) ;
429483
430- let entry =
431- self . cache . entry ( pretty_path. clone ( ) ) . or_insert_with ( || match fs:: metadata ( file) {
484+ for base in once ( & self . root ) . chain ( self . link_targets_dirs . iter ( ) ) {
485+ let entry = self . cache . entry ( pretty_path. clone ( ) ) ;
486+ if let Entry :: Occupied ( e) = & entry
487+ && !matches ! ( e. get( ) , FileEntry :: Missing )
488+ {
489+ break ;
490+ }
491+
492+ let file = base. join ( & pretty_path) ;
493+ entry. insert_entry ( match fs:: metadata ( & file) {
432494 Ok ( metadata) if metadata. is_dir ( ) => FileEntry :: Dir ,
433495 Ok ( _) => {
434496 if file. extension ( ) . and_then ( |s| s. to_str ( ) ) != Some ( "html" ) {
435497 FileEntry :: OtherFile
436498 } else {
437499 report. html_files += 1 ;
438- load_html_file ( file, report)
500+ load_html_file ( & file, report)
439501 }
440502 }
441- Err ( e) if e. kind ( ) == ErrorKind :: NotFound => FileEntry :: Missing ,
442- Err ( e) => {
443- // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
444- // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
445- #[ cfg( windows) ]
446- if e. raw_os_error ( ) == Some ( ERROR_INVALID_NAME )
447- && file. as_os_str ( ) . to_str ( ) . map_or ( false , |s| s. contains ( "::" ) )
448- {
449- return FileEntry :: Missing ;
450- }
451- panic ! ( "unexpected read error for {}: {}" , file. display( ) , e) ;
452- }
503+ Err ( e) if is_not_found_error ( & file, & e) => FileEntry :: Missing ,
504+ Err ( e) => panic ! ( "unexpected read error for {}: {}" , file. display( ) , e) ,
453505 } ) ;
506+ }
507+
508+ let entry = self . cache . get ( & pretty_path) . unwrap ( ) ;
454509 ( pretty_path, entry)
455510 }
456511}
@@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
629684 ids. insert ( encoded) ;
630685 }
631686}
687+
688+ fn is_not_found_error ( path : & Path , error : & std:: io:: Error ) -> bool {
689+ // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
690+ const WINDOWS_ERROR_INVALID_NAME : i32 = 123 ;
691+
692+ error. kind ( ) == std:: io:: ErrorKind :: NotFound
693+ // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
694+ // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
695+ // in `LINKCHECK_EXCEPTIONS`.
696+ || ( cfg ! ( windows)
697+ && error. raw_os_error ( ) == Some ( WINDOWS_ERROR_INVALID_NAME )
698+ && path. as_os_str ( ) . to_str ( ) . map_or ( false , |s| s. contains ( "::" ) ) )
699+ }
0 commit comments