-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathread-web-page
More file actions
executable file
·136 lines (114 loc) · 2.31 KB
/
read-web-page
File metadata and controls
executable file
·136 lines (114 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat >&2 <<'EOF'
Usage: read-web-page [--all] <url>
Convert a public web URL to Markdown.
Default order: r.jina.ai, markitdown, defuddle.md.
Use --all to print output from all three converters.
EOF
}
strip_scheme() {
local url="$1"
url="${url#https://}"
url="${url#http://}"
printf '%s' "$url"
}
markitdown_url() {
markitdown "$1"
}
jina_url() {
curl -fsSL "https://r.jina.ai/$1"
}
defuddle_url() {
curl -fsSL "https://defuddle.md/$(strip_scheme "$1")"
}
run_converter() {
local name="$1"
local url="$2"
case "$name" in
markitdown) markitdown_url "$url" ;;
jina) jina_url "$url" ;;
defuddle) defuddle_url "$url" ;;
*) echo "Unknown converter: $name" >&2; return 2 ;;
esac
}
run_first_success() {
local url="$1"
local tmp err name
tmp="$(mktemp)"
err="$(mktemp)"
trap 'rm -f "$tmp" "$err"' RETURN
for name in jina markitdown defuddle; do
if run_converter "$name" "$url" >"$tmp" 2>"$err"; then
cat "$tmp"
return 0
fi
printf '%s failed:\n' "$name" >&2
cat "$err" >&2
done
echo "All converters failed" >&2
return 1
}
run_all() {
local url="$1"
local tmp err name label failures=0 successes=0
tmp="$(mktemp)"
err="$(mktemp)"
trap 'rm -f "$tmp" "$err"' RETURN
for name in jina markitdown defuddle; do
case "$name" in
markitdown) label="markitdown" ;;
jina) label="r.jina.ai" ;;
defuddle) label="defuddle.md" ;;
esac
printf '## %s\n\n' "$label"
if run_converter "$name" "$url" >"$tmp" 2>"$err"; then
cat "$tmp"
printf '\n\n'
successes=$((successes + 1))
else
failures=$((failures + 1))
printf '_Failed._\n\n'
printf '%s failed:\n' "$label" >&2
cat "$err" >&2
fi
done
if [ "$successes" -eq 0 ]; then
echo "All converters failed" >&2
return 1
fi
if [ "$failures" -gt 0 ]; then
return 0
fi
return 0
}
all=false
if [ "$#" -eq 0 ]; then
usage
exit 1
fi
case "${1:-}" in
--all)
all=true
shift
;;
-h|--help)
usage
exit 0
;;
esac
if [ "$#" -ne 1 ]; then
usage
exit 1
fi
url="$1"
case "$url" in
http://*|https://*) ;;
*) echo "URL must start with http:// or https://" >&2; exit 1 ;;
esac
if [ "$all" = true ]; then
run_all "$url"
else
run_first_success "$url"
fi