forked from KathanP19/waybackfetch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
231 lines (195 loc) · 5.98 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
package main
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
)
// ANSI color codes for styling
const (
reset = "\033[0m"
red = "\033[31m"
green = "\033[32m"
yellow = "\033[33m"
cyan = "\033[36m"
)
// Banner with colors
func printBanner() {
fmt.Println(string(cyan + `
_ _ _ _ _ _______ _
| || || | | | | | (_______) _ | |
| || || | ____ _ _| | _ ____ ____| | _ _____ ____| |_ ____| | _
| ||_|| |/ _ | | | | || \ / _ |/ ___) | / ) ___) _ ) _)/ ___) || \
| |___| ( ( | | |_| | |_) | ( | ( (___| |< (| | ( (/ /| |_( (___| | | |
\______|\_||_|\__ |____/ \_||_|\____)_| \_)_| \____)\___)____)_| |_|
(____/
` + reset))
fmt.Println(string(yellow + " v1.1 Created by KathanP19" + reset))
fmt.Println()
}
// WaybackResponse holds the snapshot timestamps returned from the Wayback Machine API
type WaybackResponse [][]string
type Snapshot struct {
Timestamp string `json:"timestamp"`
Original string `json:"original"`
Digest string `json:"digest"`
Length string `json:"length"`
}
const SnapshotURL = "https://web.archive.org/web/%sif_/%s"
// FetchSnapshotUrls fetches all snapshot URLs for a given URL
func FetchSnapshotUrls(targetUrl string, silent bool, output io.Writer, uniqOnly bool) error {
baseUrl := "http://web.archive.org/cdx/search/cdx"
u, err := url.Parse(baseUrl)
if err != nil {
return fmt.Errorf(red+"error parsing base URL:"+reset+" %v", err)
}
q := u.Query()
q.Set("url", targetUrl)
q.Set("matchType", "exact")
q.Set("output", "json")
q.Set("fl", "timestamp,original,digest,length")
u.RawQuery = q.Encode()
resp, err := http.Get(u.String())
if err != nil {
return fmt.Errorf(red+"error fetching data:"+reset+" %v", err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf(red+"error reading response body:"+reset+" %v", err)
}
var data WaybackResponse
err = json.Unmarshal(body, &data)
if err != nil {
return fmt.Errorf(red+"error parsing JSON:"+reset+" %v", err)
}
// Check if the response contains at least one row beyond the header row
if len(data) == 0 {
return fmt.Errorf(red + "unexpected empty response from Wayback Machine API" + reset)
} else if len(data) == 1 {
if !silent {
fmt.Println(yellow + "No snapshots found for the given URL." + reset)
}
return nil
}
var snapshots []Snapshot
uniqSnapshots := make(map[string]bool)
for _, row := range data[1:] {
// Ensure row contains exactly 4 fields before processing
if len(row) != 4 {
if !silent {
fmt.Printf(yellow+"Skipping malformed row: %v\n"+reset, row)
}
continue
}
digest := row[2]
// Check if only unique snapshots should be returned
if uniqOnly && uniqSnapshots[digest] {
continue
}
uniqSnapshots[digest] = true
snapshots = append(snapshots, Snapshot{
Timestamp: row[0],
Original: row[1],
Digest: digest,
Length: row[3],
})
}
// Output snapshots
for _, snapshot := range snapshots {
snapshotUrl := fmt.Sprintf(SnapshotURL, snapshot.Timestamp, targetUrl)
fmt.Fprintln(output, snapshotUrl)
}
return nil
}
func main() {
url := flag.String("u", "", "Single URL to fetch snapshots for")
list := flag.String("l", "", "File containing list of URLs to fetch snapshots for")
silent := flag.Bool("silent", false, "Enable silent mode, only print URLs")
outputFile := flag.String("o", "", "Output file to write results")
unique := flag.Bool("d", false, "Enable unique snapshot filtering by content digest")
// Custom help message
flag.Usage = func() {
printBanner()
fmt.Println("Usage:")
fmt.Println(" -u <url> Fetch snapshots for a single URL")
fmt.Println(" -l <file> File containing list of URLs to fetch snapshots for")
fmt.Println(" -o <file> Output file to save the results")
fmt.Println(" -d Enable unique snapshot filtering by content digest")
fmt.Println(" --silent Enable silent mode, only print URLs")
fmt.Println(" -h, --help Show this help message and exit")
}
flag.Parse()
// Print the banner unless silent mode is enabled
if !*silent {
printBanner()
}
var output io.Writer = os.Stdout
if *outputFile != "" {
file, err := os.Create(*outputFile)
if err != nil {
fmt.Printf("Error creating output file: %v\n", err)
return
}
defer file.Close()
output = io.MultiWriter(os.Stdout, file)
}
processUrls := func(url string) {
if !*silent {
fmt.Printf(green+"\nFetching snapshots for URL:"+reset+" %s\n", url)
}
if err := FetchSnapshotUrls(url, *silent, output, *unique); err != nil && !*silent {
fmt.Println("Error:", err)
}
}
if *url != "" {
processUrls(*url)
if *outputFile != "" && !*silent {
fmt.Printf(green+"\nResults have been saved to:"+reset+" %s\n", *outputFile)
}
return
}
if *list != "" {
file, err := os.Open(*list)
if err != nil {
if !*silent {
fmt.Printf(red+"Error opening file:"+reset+" %v\n", err)
}
return
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
processUrls(scanner.Text())
}
if err := scanner.Err(); err != nil && !*silent {
fmt.Printf(red+"Error reading file:"+reset+" %v\n", err)
}
if *outputFile != "" && !*silent {
fmt.Printf(green+"\nResults have been saved to:"+reset+" %s\n", *outputFile)
}
return
}
stat, _ := os.Stdin.Stat()
if (stat.Mode() & os.ModeCharDevice) == 0 {
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
processUrls(scanner.Text())
}
if err := scanner.Err(); err != nil && !*silent {
fmt.Printf(red+"Error reading stdin:"+reset+" %v\n", err)
}
if *outputFile != "" && !*silent {
fmt.Printf(green+"\nResults have been saved to:"+reset+" %s\n", *outputFile)
}
return
}
if !*silent {
fmt.Println(red + "Please provide -u <URL> for a single URL, -l <file> for a list of URLs, or input via stdin" + reset)
}
}