|
30 | 30 | PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$") |
31 | 31 | PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$") |
32 | 32 | PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$") |
33 | | -PATTERN_AGENT = re.compile(r"^(/Satoshi:0.14.(0|1|2|99)/|/Satoshi:0.15.(0|1|2|99)|/Satoshi:0.16.(0|1|2|99)/)$") |
| 33 | +PATTERN_AGENT = re.compile( |
| 34 | + r"^/Satoshi:(" |
| 35 | + r"0.14.(0|1|2|3|99)|" |
| 36 | + r"0.15.(0|1|2|99)|" |
| 37 | + r"0.16.(0|1|2|3|99)|" |
| 38 | + r"0.17.(0|0.1|1|2|99)|" |
| 39 | + r"0.18.(0|1|99)|" |
| 40 | + r"0.19.99" |
| 41 | + r")") |
34 | 42 |
|
35 | 43 | def parseline(line): |
36 | 44 | sline = line.split() |
@@ -99,6 +107,13 @@ def parseline(line): |
99 | 107 | 'sortkey': sortkey, |
100 | 108 | } |
101 | 109 |
|
| 110 | +def dedup(ips): |
| 111 | + '''deduplicate by address''' |
| 112 | + d = {} |
| 113 | + for ip in ips: |
| 114 | + d[ip['ip']] = ip |
| 115 | + return list(d.values()) |
| 116 | + |
102 | 117 | def filtermultiport(ips): |
103 | 118 | '''Filter out hosts with more nodes per IP''' |
104 | 119 | hist = collections.defaultdict(list) |
@@ -146,29 +161,54 @@ def filterbyasn(ips, max_per_asn, max_total): |
146 | 161 | result.extend(ips_onion) |
147 | 162 | return result |
148 | 163 |
|
| 164 | +def ip_stats(ips): |
| 165 | + hist = collections.defaultdict(int) |
| 166 | + for ip in ips: |
| 167 | + if ip is not None: |
| 168 | + hist[ip['net']] += 1 |
| 169 | + |
| 170 | + return 'IPv4 %d, IPv6 %d, Onion %d' % (hist['ipv4'], hist['ipv6'], hist['onion']) |
| 171 | + |
149 | 172 | def main(): |
150 | 173 | lines = sys.stdin.readlines() |
151 | 174 | ips = [parseline(line) for line in lines] |
152 | 175 |
|
153 | | - # Skip entries with valid address. |
| 176 | + print('Initial: %s' % (ip_stats(ips)), file=sys.stderr) |
| 177 | + # Skip entries with invalid address. |
154 | 178 | ips = [ip for ip in ips if ip is not None] |
| 179 | + print('Skip entries with invalid address: %s' % (ip_stats(ips)), file=sys.stderr) |
| 180 | + # Skip duplicattes (in case multiple seeds files were concatenated) |
| 181 | + ips = dedup(ips) |
| 182 | + print('After removing duplicates: %s' % (ip_stats(ips)), file=sys.stderr) |
155 | 183 | # Skip entries from suspicious hosts. |
156 | 184 | ips = [ip for ip in ips if ip['ip'] not in SUSPICIOUS_HOSTS] |
| 185 | + print('Skip entries from suspicious hosts: %s' % (ip_stats(ips)), file=sys.stderr) |
157 | 186 | # Enforce minimal number of blocks. |
158 | 187 | ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS] |
| 188 | + print('Enforce minimal number of blocks: %s' % (ip_stats(ips)), file=sys.stderr) |
159 | 189 | # Require service bit 1. |
160 | 190 | ips = [ip for ip in ips if (ip['service'] & 1) == 1] |
161 | | - # Require at least 50% 30-day uptime. |
162 | | - ips = [ip for ip in ips if ip['uptime'] > 50] |
| 191 | + print('Require service bit 1: %s' % (ip_stats(ips)), file=sys.stderr) |
| 192 | + # Require at least 50% 30-day uptime for clearnet, 10% for onion. |
| 193 | + req_uptime = { |
| 194 | + 'ipv4': 50, |
| 195 | + 'ipv6': 50, |
| 196 | + 'onion': 10, |
| 197 | + } |
| 198 | + ips = [ip for ip in ips if ip['uptime'] > req_uptime[ip['net']]] |
| 199 | + print('Require minimum uptime: %s' % (ip_stats(ips)), file=sys.stderr) |
163 | 200 | # Require a known and recent user agent. |
164 | 201 | ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])] |
| 202 | + print('Require a known and recent user agent: %s' % (ip_stats(ips)), file=sys.stderr) |
165 | 203 | # Sort by availability (and use last success as tie breaker) |
166 | 204 | ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True) |
167 | 205 | # Filter out hosts with multiple bitcoin ports, these are likely abusive |
168 | 206 | ips = filtermultiport(ips) |
| 207 | + print('Filter out hosts with multiple bitcoin ports: %s' % (ip_stats(ips)), file=sys.stderr) |
169 | 208 | # Look up ASNs and limit results, both per ASN and globally. |
170 | 209 | ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) |
171 | 210 | # Sort the results by IP address (for deterministic output). |
| 211 | + print('Look up ASNs and limit results, both per ASN and globally: %s' % (ip_stats(ips)), file=sys.stderr) |
172 | 212 | ips.sort(key=lambda x: (x['net'], x['sortkey'])) |
173 | 213 |
|
174 | 214 | for ip in ips: |
|
0 commit comments