-
Notifications
You must be signed in to change notification settings - Fork 0
/
pairing_analyzer.py
226 lines (191 loc) · 7.22 KB
/
pairing_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
"""
Anaylze stories regarding their pairings.
"""
import argparse
import pprint
import itertools
from chord import Chord
from ff2zim.project import Project
def increment_dict(org, new):
"""
Increment each value in a dictionary with the value of the other dict with the same key.
Expects both dicts to have the same keys.
@param org: dict to increment
@type org: L{dict}
@param new: dict to use for incrementing
@type new: L{dict}
"""
for key in org:
oldval = org[key]
newval = oldval + new[key]
org[key] = newval
assert newval >= oldval
def hsv_to_rgb(h, s, v):
"""
From https://stackoverflow.com/a/26856771
"""
if s == 0.0: return (v, v, v)
i = int(h*6.) # XXX assume int() truncates!
f = (h*6.)-i; p,q,t = v*(1.-s), v*(1.-s*f), v*(1.-s*(1.-f)); i%=6
if i == 0: return (v, t, p)
if i == 1: return (q, v, p)
if i == 2: return (p, v, t)
if i == 3: return (p, q, v)
if i == 4: return (t, p, v)
if i == 5: return (v, p, q)
def colorlist(n):
"""
Create a list containing n colors.
@param n: number of colors
@type n: L{int}
"""
diff = 256 // n
colors = []
for i in range(n):
h = (i * diff) / 256.0
s = 1.0
v = 1.0
r, g, b = hsv_to_rgb(h, s, v)
#r, g, b = int(r * 256), int(g * 256), int(b * 256)
colors.append((r, g, b))
return colors
class PairingAnalyzer(object):
"""
Class for anaylzing pairing in stories from a ff2zim project.
@param path: path to the project
@type path: L{str}
"""
def __init__(self, path, adult_only=False):
self.path = path
self.adult_only = adult_only
self.project = Project(self.path)
def filter_story(self, metadata):
"""
Decide whether a story should be included, depending on the metadata.
@param metadata: metadata of story
@type metadata: L{dict}
@return: whether the story should be included
@rtype: L{bool}
"""
if self.adult_only:
return metadata["rating"].lower().strip().replace("+", "") == "m"
else:
return True
def get_pairing_data(self):
"""
Get a dict containing the collected data.
@return: a dict of the data
@rtype: L{tuple} of (L{dict} of L{str} -> L{int}, L{dict} of L{str} -> L{int})
"""
pairing_stats = {}
other_pairings = {}
for metadata in self.project.collect_metadata():
if not self.filter_story(metadata):
continue
# pairing preprocessing
# get raw pairings
rawpairings = metadata["ships"]
# split pairings with more than 2 members into individual groups
pairings = []
for pairing in rawpairings:
if len(pairing) == 1:
# ignore
continue
elif len(pairing) == 2:
# normal
pairings.append(pairing)
else:
# split
pairings += list(itertools.combinations(pairing, 2))
# ensure unified name
pairings = [tuple(sorted(s)) for s in pairings]
for pairing in pairings:
data = {
"occurences": 1,
"reviews": metadata["reviews"],
"follows": metadata["follows"],
"favorites": metadata["favs"],
"words": metadata["numWords"],
"chapters": metadata["numChapters"],
}
if pairing not in pairing_stats:
pairing_stats[pairing] = data
else:
increment_dict(pairing_stats[pairing], data)
if len(pairings) == 1:
other = None
continue # comment this line out to keep None
else:
other = [p for p in pairings if p != pairing][0]
cor_key = tuple(sorted(("/".join(pairing), ("/".join(other) if other is not None else "None"))))
if cor_key not in other_pairings:
other_pairings[cor_key] = {"occurences": 1}
else:
other_pairings[cor_key]["occurences"] += 1
return {
"stats": pairing_stats,
"correlation": other_pairings,
}
def makegraph(self, outfile, masterkey="stats", key="occurences"):
"""
Generate a graph.
@param outfile: path to file to write to
@type outfile: L{str}
@param masterkey: category key of get_pairing_data()'s result to use.
@type masterkey: L{str}
@param key: key for value to use
@type key: L{str}
"""
# prepare data
data = self.get_pairing_data()
pairingstats = data[masterkey]
# purge all entries were value of key <= 0
to_purge = set()
for pairing in pairingstats.keys():
v = pairingstats[pairing][key]
if v <= 0:
to_purge.add(pairing)
for p in to_purge:
del pairingstats[p]
names = list(sorted(set([name for pairing in data[masterkey] for name in pairing])))
n_names = len(names)
matrix = [[0] * n_names for i in range(n_names)]
for pairing in pairingstats.keys():
v = pairingstats[pairing][key]
name_1, name_2 = pairing
i, j = names.index(name_1), names.index(name_2)
matrix[i][j] = v
matrix[j][i] = v
c = Chord(
matrix,
names,
width=1600,
margin=200,
padding=0.02,
wrap_labels=False,
font_size="12px",
noun="FanFics",
allow_download=True,
#title="Ships (by {})".format(key),
credit=False,
)
c.to_html(filename=outfile)
def main():
"""
The main function.
"""
parser = argparse.ArgumentParser(description="Analyze stories regarding their pairings of a ff2zim project")
parser.add_argument("project", help="path to ff2zim project")
parser.add_argument("outfile", help="path to write HTML to")
parser.add_argument("masterkey", action="store", choices=("stats", "correlation"), help="switch between stats for individual pairings and stats between pairings")
parser.add_argument("key", action="store", choices=("occurences", "follows", "favorites", "words", "chapters"))
parser.add_argument("--adult-only", action="store_true", help="Only evaluate fanfics rated mature / adult_only. May not work correctly.")
parser.add_argument("-p", action="store_true", help="print gathered data")
ns = parser.parse_args()
analyzer = PairingAnalyzer(ns.project, adult_only=ns.adult_only)
data = analyzer.get_pairing_data()
if ns.p:
pprint.pprint(data)
analyzer.makegraph(ns.outfile, masterkey=ns.masterkey, key=ns.key)
if __name__ == "__main__":
main()