forked from EFForg/privacybadger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
apply_effdntlist.py
executable file
·85 lines (64 loc) · 2.59 KB
/
apply_effdntlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
import json
import random
import re
import sys
import time
import urllib.request
from collections import OrderedDict
from tldextract import TLDExtract
DNT_LIST_URL = 'https://www.eff.org/files/effdntlist.txt'
def get_dnt_domains():
dnt_domains = []
with urllib.request.urlopen(DNT_LIST_URL) as response:
domain_regex = r'^@@\|\|(.*)\^\$third-party$'
for line in response:
match = re.match(domain_regex, line.decode('utf-8').strip())
if match:
dnt_domains.append(match.groups()[0])
return dnt_domains
def get_next_update_time():
epoch_now = int(time.time()) * 1000
one_day = 1000 * 60 * 60 * 24
return random.randint(epoch_now + one_day, epoch_now + one_day * 7)
def apply_dnt_domains(data, dnt_domains):
tld_extract = TLDExtract(cache_dir=False)
for domain in dnt_domains:
parsed_tld = tld_extract(domain)
base = parsed_tld.domain + '.' + parsed_tld.suffix
# only apply DNT scan results for domains
# that are already present in seed data
# and are blocked
if base not in data['action_map'] or data['action_map'][base]['heuristicAction'] != "block":
continue
if domain in data['action_map']:
if "dnt" not in data['action_map'][domain] or not data['action_map'][domain]['dnt']:
print("Marking %s as DNT compliant ..." % domain)
data['action_map'][domain]['dnt'] = True
data['action_map'][domain]['nextUpdateTime'] = get_next_update_time()
else:
print("Adding %s as DNT compliant ..." % domain)
data['action_map'][domain] = {
"dnt": True,
"heuristicAction": "",
"nextUpdateTime": get_next_update_time(),
}
return data
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: %s BADGER_SEED_DATA.json" % sys.argv[0])
sys.exit(1)
# get DNT scan results
dnt_domains = get_dnt_domains()
if not dnt_domains:
print("No DNT list domains loaded!")
sys.exit(1)
with open(sys.argv[1], 'r+') as seed_file:
# read in seed data, preserving ordering
seed_data = json.load(seed_file, object_pairs_hook=OrderedDict)
# apply DNT scan results to seed data
seed_data = apply_dnt_domains(seed_data, dnt_domains)
# write the data back out
seed_file.seek(0)
# this should match how data gets written out by Badger Sett
json.dump(seed_data, seed_file, indent=2, sort_keys=True, separators=(',', ': '))