-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathinformation_theory.py
58 lines (33 loc) · 1.42 KB
/
information_theory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import math
from collections import defaultdict
def p_log_p(p, base = 2):
assert 0 <= p and p <= 1
if 0 < p and p < 1:
return p*math.log(p, base)
else:
return 0
def create_prob_list(list_hash):
"""
Gives a list of probabilities [p_1, ..., p_n] with p_1 + ... + p_n = 1 for a list of hashable objects based on the relative frequencies of the objects.
"""
entities_to_counts = defaultdict(int)
for entity in list_hash:
entities_to_counts[entity] += 1
len_list_hash = sum(entities_to_counts.values())
prob_list = (count/len_list_hash for count in entities_to_counts.values())
return prob_list
def entropy_from_prob_list(prob_list, base = 2):
"""
Gives the entropy H for a list of probabilities [p_1,...,p_n] such that p_1 + ... + p_n = 1.
"""
return -sum(p_log_p(p, base) for p in prob_list)
def entropy(list_hash, base = 2):
"""
Gives the entropy H for a list of hashable objects based on the relative frequencies of the objects
"""
return entropy_from_prob_list(create_prob_list(list_hash), base)
def mi(list_hash_1, list_hash_2, base = 2):
"""
Gives the mutual information I of two lists of hashable objects X and Y based on the identity I(X,Y) = H(X) + H(Y) - H(X,Y).
"""
return entropy(list_hash_1, base) + entropy(list_hash_2, base) - entropy(zip(list_hash_1, list_hash_2), base)