Skip to content

Commit 07ad4cb

Browse files
committed
added unique urls example
1 parent 8c39526 commit 07ad4cb

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed

example/unique_urls.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"github.com/dsparling/go-apache-log-parser"
6+
"log"
7+
"regexp"
8+
"sort"
9+
"strings"
10+
)
11+
12+
// A data structure to hold a key/value pair.
13+
type Pair struct {
14+
Key string
15+
Value int
16+
}
17+
18+
// A slice of Pairs that implements sort.Interface to sort by Value.
19+
type PairList []Pair
20+
21+
func (p PairList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
22+
func (p PairList) Len() int { return len(p) }
23+
func (p PairList) Less(i, j int) bool { return p[i].Value < p[j].Value }
24+
25+
func main() {
26+
uniqueUrls := make(map[string]int)
27+
lines, err := apachelogparser.Parse("logs/access_log")
28+
if err != nil {
29+
log.Fatal(err)
30+
}
31+
for _, line := range lines {
32+
re, _ := regexp.Compile(`([^?=&]+)(=([^&]*))?`)
33+
res := re.FindAllStringSubmatch(line.Url, -1)
34+
if len(res) > 0 {
35+
url := res[0][0]
36+
if skipUrl(url) {
37+
continue
38+
}
39+
uniqueUrls[url]++
40+
} else {
41+
continue
42+
}
43+
}
44+
fmt.Println(len(uniqueUrls))
45+
fmt.Println("\n##### Sorted by link #####")
46+
// Store the keys in slice in sorted order
47+
var keys []string
48+
for k := range uniqueUrls {
49+
keys = append(keys, k)
50+
}
51+
sort.Strings(keys)
52+
// Print in keys alpha order
53+
for _, k := range keys {
54+
fmt.Println(k, "(", uniqueUrls[k], ")")
55+
}
56+
57+
fmt.Println("\n##### Sorted by value #####")
58+
sortedUniqueUrlsByValue := sortMapByValue(uniqueUrls)
59+
for _, v := range sortedUniqueUrlsByValue {
60+
fmt.Println(v.Key, "(", v.Value, ")")
61+
}
62+
}
63+
64+
// A function to turn a map into a PairList, then sort and return it.
65+
// Andrew Gerrand: https://groups.google.com/d/msg/golang-nuts/FT7cjmcL7gw/Gj4_aEsE_IsJ
66+
func sortMapByValue(m map[string]int) PairList {
67+
p := make(PairList, len(m))
68+
i := 0
69+
for k, v := range m {
70+
p[i] = Pair{k, v}
71+
i++
72+
}
73+
sort.Sort(p)
74+
return p
75+
}
76+
77+
func skipUrl(url string) bool {
78+
// Do any filtering you need here
79+
if strings.HasSuffix(url, ".js") ||
80+
strings.HasSuffix(url, ".css") ||
81+
strings.HasSuffix(url, ".html") ||
82+
strings.HasSuffix(url, ".txt") ||
83+
strings.HasSuffix(url, ".php") ||
84+
strings.HasSuffix(url, ".asp") ||
85+
strings.HasSuffix(url, ".cgi") ||
86+
strings.HasSuffix(url, ".xml") ||
87+
strings.HasSuffix(url, ".com") ||
88+
strings.HasSuffix(url, ".gif") ||
89+
strings.HasSuffix(url, ".png") ||
90+
strings.HasSuffix(url, ".jpg") ||
91+
strings.HasSuffix(url, ".ico") {
92+
return true
93+
} else {
94+
return false
95+
}
96+
}

0 commit comments

Comments
 (0)