Skip to content

Commit 4238b56

Browse files
author
Kyle Maxwell
committed
Enriches IP data with DNS records
1 parent 3d4707c commit 4238b56

File tree

1 file changed

+30
-18
lines changed

1 file changed

+30
-18
lines changed

winnower.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
#! /usr/bin/env python
2+
import ConfigParser
23
import csv
4+
import dnsdb_query
35
import json
46
import pygeoip
57
import sys
68

79
from netaddr import IPAddress, IPRange, IPSet
810

9-
org_data = []
10-
geo_data = pygeoip.GeoIP('data/GeoIP.dat', pygeoip.MEMORY_CACHE)
11+
12+
def setup_dnsdb():
13+
config = ConfigParser.ConfigParser()
14+
config.read('combine.cfg')
15+
server = config.get('Winnower', 'dnsdb_server')
16+
api = config.get('Winnower', 'dnsdb_api')
17+
sys.stderr.write('Setting up DNSDB client\n')
18+
return dnsdb_query.DnsdbClient(server, api)
19+
1120

1221
def load_gi_org(filename):
1322
gi_org = {}
@@ -18,7 +27,7 @@ def load_gi_org(filename):
1827
return gi_org
1928

2029

21-
def org_by_addr(address):
30+
def org_by_addr(address, org_data):
2231
as_num = None
2332
as_name = None
2433
for org in org_data:
@@ -29,23 +38,26 @@ def org_by_addr(address):
2938

3039

3140
def maxhits(dns_records):
32-
pass
41+
max = 0
42+
hostname = None
43+
for record in dns_records:
44+
if record['count'] > max:
45+
max = record['count']
46+
hostname = record['rrname'].rstrip('.')
47+
return hostname
3348

3449

35-
def dnsdb(address, record_type):
36-
pass
50+
def enrich_IPv4(address, org_data, geo_data, dnsdb):
51+
as_num, as_name = org_by_addr(address, org_data)
52+
country = geo_data.country_code_by_addr('%s' % address)
53+
hostname = maxhits(dnsdb.query_rdata_ip('%s' % address))
3754

38-
39-
def enrich_IPv4(address):
40-
as_num, as_name = org_by_addr(address)
41-
country = geo_data.country_code_by_addr(address)
42-
hostname = maxhits(dnsdb(address, "PTR"))
4355
return (address, as_num, as_name, country, hostname)
4456

4557

4658
def reserved(address):
4759
# from http://en.wikipedia.org/wiki/Reserved_IP_addresses:
48-
ranges = IPSet(['0.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '192.88.99.0/24',
60+
ranges = IPSet(['0.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '192.88.99.0/24',
4961
'198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '233.252.0.0/24'])
5062
a_reserved = address.is_reserved()
5163
a_private = address.is_private()
@@ -61,7 +73,8 @@ def winnow(in_file, out_file, enr_file):
6173
crop = json.load(f)
6274

6375
org_data = load_gi_org('data/GeoIPASNum2.csv')
64-
#country_data = load_gi_country('data/')
76+
geo_data = pygeoip.GeoIP('data/GeoIP.dat')
77+
dnsdb = setup_dnsdb()
6578

6679
wheat = []
6780
enriched = []
@@ -72,16 +85,15 @@ def winnow(in_file, out_file, enr_file):
7285
ipaddr = IPAddress(addr)
7386
if not reserved(ipaddr):
7487
wheat.append(each)
75-
enriched.append(enrich_IPv4(ipaddr))
88+
enriched.append(enrich_IPv4(ipaddr, org_data, geo_data, dnsdb))
7689
else:
77-
sys.stderr.write("%s is reserved, sorry" % addr)
78-
90+
sys.stderr.write("%s is reserved, sorry\n" % addr)
7991

8092
with open(out_file, 'wb') as f:
8193
json.dump(wheat, f, indent=2)
8294

83-
#with open(enr_file, 'wb') as f:
84-
#json.dump(enriched, f, indent=2)
95+
with open(enr_file, 'wb') as f:
96+
json.dump(enriched, f, indent=2)
8597

8698

8799
if __name__ == "__main__":

0 commit comments

Comments
 (0)