@@ -30,7 +30,12 @@ func main() {
30
30
)
31
31
flag .Parse ()
32
32
33
- // Create all tasks and send them to the channel.
33
+ columns := []string {* name , * address , * phone , * email }
34
+ headers := []string {"name" , "address" , "phone" , "email" }
35
+ // url and id are added as the first two rows.
36
+ headers = append ([]string {"url" , "id" }, headers ... )
37
+
38
+ // create all tasks and send them to the channel.
34
39
type task struct {
35
40
url string
36
41
id int
@@ -43,7 +48,7 @@ func main() {
43
48
close (tasks )
44
49
}()
45
50
46
- // Create workers and schedule closing results when all work is done.
51
+ // create workers and schedule closing results when all work is done.
47
52
results := make (chan []string )
48
53
var wg sync.WaitGroup
49
54
wg .Add (* concurrency )
@@ -56,7 +61,7 @@ func main() {
56
61
go func () {
57
62
defer wg .Done ()
58
63
for t := range tasks {
59
- r , err := fetch (t .url , t .id , * name , * address , * phone , * email )
64
+ r , err := fetch (t .url , t .id , columns )
60
65
if err != nil {
61
66
log .Printf ("could not fetch %v: %v" , t .url , err )
62
67
continue
@@ -66,12 +71,12 @@ func main() {
66
71
}()
67
72
}
68
73
69
- if err := dumpCSV (* outfile , results ); err != nil {
74
+ if err := dumpCSV (* outfile , headers , results ); err != nil {
70
75
log .Printf ("could not write to %s: %v" , * outfile , err )
71
76
}
72
77
}
73
78
74
- func fetch (url string , id int , queries ... string ) ([]string , error ) {
79
+ func fetch (url string , id int , queries [] string ) ([]string , error ) {
75
80
res , err := http .Get (url )
76
81
if err != nil {
77
82
return nil , fmt .Errorf ("could not get %s: %v" , url , err )
@@ -86,21 +91,21 @@ func fetch(url string, id int, queries ...string) ([]string, error) {
86
91
return nil , fmt .Errorf ("bad response from server: %s" , res .Status )
87
92
}
88
93
89
- // Load response into GoQuery
94
+ // parse body with goquery.
90
95
doc , err := goquery .NewDocumentFromReader (res .Body )
91
96
if err != nil {
92
97
return nil , fmt .Errorf ("could not parse page: %v" , err )
93
98
}
94
99
95
- // Extract info we want
100
+ // extract info we want.
96
101
r := []string {url , strconv .Itoa (id )}
97
102
for _ , q := range queries {
98
103
r = append (r , strings .TrimSpace (doc .Find (q ).Text ()))
99
104
}
100
105
return r , nil
101
106
}
102
107
103
- func dumpCSV (path string , records <- chan []string ) error {
108
+ func dumpCSV (path string , headers [] string , records <- chan []string ) error {
104
109
f , err := os .Create (path )
105
110
if err != nil {
106
111
return fmt .Errorf ("unable to create file %s: %v" , path , err )
@@ -110,19 +115,19 @@ func dumpCSV(path string, records <-chan []string) error {
110
115
w := csv .NewWriter (f )
111
116
defer w .Flush ()
112
117
113
- // Write headers to file
114
- if err := w .Write ([] string { "id" , "name" , "url" , "address" , "phone" , "email" } ); err != nil {
118
+ // write headers to file.
119
+ if err := w .Write (headers ); err != nil {
115
120
log .Fatalf ("error writing record to csv: %v" , err )
116
121
}
117
122
118
- // Write all records
123
+ // write all records.
119
124
for r := range records {
120
125
if err := w .Write (r ); err != nil {
121
126
log .Fatalf ("could not write record to csv: %v" , err )
122
127
}
123
128
}
124
129
125
- // Check for extra errors
130
+ // check for extra errors.
126
131
if err := w .Error (); err != nil {
127
132
return fmt .Errorf ("writer failed: %v" , err )
128
133
}
0 commit comments