Skip to content
This repository was archived by the owner on Oct 16, 2019. It is now read-only.

Commit fcbe834

Browse files
committed
Add a tool for checking links
1 parent c852d4a commit fcbe834

File tree

1 file changed

+136
-0
lines changed

1 file changed

+136
-0
lines changed

tools/check-links.hs

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/env stack
2+
{- stack --resolver lts-10.0 --install-ghc script
3+
--package bytestring
4+
--package cmark
5+
--package containers
6+
--package directory
7+
--package filepath
8+
--package http-client
9+
--package http-client-tls
10+
--package http-types
11+
--package network-uri
12+
--package text -}
13+
{-# OPTIONS_GHC -Werror -Weverything -Wno-implicit-prelude -Wno-unsafe #-}
14+
module Main (main) where
15+
import qualified Control.Exception as Exception
16+
import qualified CMark as Markdown
17+
import qualified Data.ByteString as Bytes
18+
import qualified Data.Maybe as Maybe
19+
import qualified Data.Set as Set
20+
import qualified Data.Text as Text
21+
import qualified Data.Text.Encoding as Text
22+
import qualified Data.Text.IO as Text
23+
import qualified Network.HTTP.Client as Client
24+
import qualified Network.HTTP.Client.TLS as Client
25+
import qualified Network.HTTP.Types as Http
26+
import qualified Network.URI as Uri
27+
import qualified System.Directory as Directory
28+
import qualified System.FilePath as Path
29+
import qualified Text.Printf as Printf
30+
31+
main :: IO ()
32+
main = do
33+
let directory = Path.combine "content" "issues"
34+
manager <- Client.newTlsManager
35+
36+
issues <- getIssues directory
37+
Printf.printf
38+
"Found %d issue%s.\n"
39+
(length issues)
40+
(if length issues == 1 then "" else "s")
41+
42+
links <- getLinks directory issues
43+
Printf.printf
44+
"Found %d link%s.\n"
45+
(Set.size links)
46+
(if Set.size links == 1 then "" else "s")
47+
48+
mapM_ (checkLink manager) links
49+
50+
getIssues :: FilePath -> IO [FilePath]
51+
getIssues directory = do
52+
files <- Directory.listDirectory directory
53+
pure (filter isMarkdown files)
54+
55+
isMarkdown :: FilePath -> Bool
56+
isMarkdown file = hasExtension "markdown" file
57+
58+
hasExtension :: String -> FilePath -> Bool
59+
hasExtension extension file =
60+
Path.takeExtension file == Path.extSeparator : extension
61+
62+
getLinks :: FilePath -> [FilePath] -> IO (Set.Set Uri.URI)
63+
getLinks directory issues = do
64+
uris <- extractIssuesUris directory issues
65+
let httpUris = Set.filter isHttp uris
66+
pure httpUris
67+
68+
extractIssuesUris :: FilePath -> [FilePath] -> IO (Set.Set Uri.URI)
69+
extractIssuesUris directory issues = do
70+
urls <- extractIssuesUrls directory issues
71+
let uris = Maybe.mapMaybe parseUri urls
72+
pure (Set.fromList uris)
73+
74+
extractIssuesUrls :: FilePath -> [FilePath] -> IO [Markdown.Url]
75+
extractIssuesUrls directory issues = do
76+
urls <- mapM (extractIssueUrls directory) issues
77+
pure (concat urls)
78+
79+
extractIssueUrls :: FilePath -> FilePath -> IO [Markdown.Url]
80+
extractIssueUrls directory issue = do
81+
let file = Path.combine directory issue
82+
text <- Text.readFile file
83+
let node = Markdown.commonmarkToNode [] text
84+
pure (extractNodeUrls node)
85+
86+
extractNodeUrls :: Markdown.Node -> [Markdown.Url]
87+
extractNodeUrls (Markdown.Node _ nodeType nodes) =
88+
let links = concatMap extractNodeUrls nodes
89+
in case extractNodeTypeUrl nodeType of
90+
Nothing -> links
91+
Just link -> link : links
92+
93+
extractNodeTypeUrl :: Markdown.NodeType -> Maybe Markdown.Url
94+
extractNodeTypeUrl nodeType = case nodeType of
95+
Markdown.LINK url _ -> Just url
96+
_ -> Nothing
97+
98+
parseUri :: Markdown.Url -> Maybe Uri.URI
99+
parseUri url = Uri.parseAbsoluteURI (Text.unpack url)
100+
101+
isHttp :: Uri.URI -> Bool
102+
isHttp uri = hasScheme "http" uri || hasScheme "https" uri
103+
104+
hasScheme :: String -> Uri.URI -> Bool
105+
hasScheme scheme uri = safeInit (Uri.uriScheme uri) == Just scheme
106+
107+
safeInit :: [a] -> Maybe [a]
108+
safeInit xs = case xs of
109+
[] -> Nothing
110+
_ -> Just (init xs)
111+
112+
checkLink :: Client.Manager -> Uri.URI -> IO ()
113+
checkLink manager uri = do
114+
let url = displayUri uri
115+
request <- Client.parseRequest url
116+
Exception.catch
117+
(do
118+
response <- Client.httpNoBody (withUserAgent request) manager
119+
Printf.printf
120+
"- %d %s\n"
121+
(Http.statusCode (Client.responseStatus response))
122+
url)
123+
(\ exception -> case exception of
124+
Client.HttpExceptionRequest _ x -> Printf.printf "- 001 %s %s\n" url (show x)
125+
Client.InvalidUrlException _ x -> Printf.printf "- 002 %s %s\n" url x)
126+
127+
displayUri :: Uri.URI -> String
128+
displayUri uri = Uri.uriToString id uri ""
129+
130+
withUserAgent :: Client.Request -> Client.Request
131+
withUserAgent request = request
132+
{ Client.requestHeaders = [(Http.hUserAgent, userAgent)]
133+
}
134+
135+
userAgent :: Bytes.ByteString
136+
userAgent = Text.encodeUtf8 (Text.pack "haskell-weekly")

0 commit comments

Comments
 (0)