-
Notifications
You must be signed in to change notification settings - Fork 7
/
parser.py
129 lines (105 loc) · 4.45 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
''' infinite music from a midi file, mostly if that midi is gnossiennes no 1 '''
from mido import MidiFile
from collections import defaultdict
import json
import sys
class NotationDistribution(object):
''' probabilistic music notation generation data '''
def __init__(self, filename):
# keep track of the first note in the original piece
self.starts = [None, None]
# markov chain probability tables for each track
self.track_dists = []
# relationship between simulatanous notes between tracks
self.joint_dist = defaultdict(lambda: defaultdict(lambda: 0))
self.measure_length = 1920
# populates the distributions
self.parse_midi(filename)
def parse_midi(self, filename):
''' create probability distributions from a midi file
distributions look like
{note: {option: probability, option: probability}, note: {..}, ...} '''
mid = MidiFile(filename)
tracks = mid.tracks
# probability that one note follows another in a track
for (idx, track) in enumerate(tracks):
dist = defaultdict(lambda: defaultdict(lambda: 0))
tokens = self.tokenize_track(track)
for i in range(len(tokens) - 1):
one = tokens[i]['identifier']
two = tokens[i + 1]['identifier']
if self.starts[idx] is None:
self.starts[idx] = one
dist[one][two] += 1
# remove dead ends (or just make them very improbable)
dead = []
for (idx, entry) in dist.items():
if not len(entry.keys()):
dead.append(idx)
for (idx, entry) in dist.items():
dist[idx] = {k: v if not k in dead else 0 \
for (k, v) in entry.items()}
self.track_dists.append(dist)
# probability of a note in a track given the behavior of the other track
def tokenize_track(self, track):
''' group 4/4 measures together (1920 ms) -- sorry satie '''
# controls and settings get added back later
notes = [n for n in track if n.type == 'note_on']
tokens = []
group = []
running_time = 0
rest_token = {
'identifier': '70/1/0/1920',
'notes': [],
}
for note in notes:
running_time += note.time
# we're done, process the note group into a token
if running_time >= self.measure_length and note.velocity > 0:
add_rest = False
# we have a stray full measure rest
if group[-1][0].time > 3000:
with_rest = group[-1]
with_rest[0].time -= self.measure_length
with_rest[1] -= self.measure_length
group[-1] = with_rest
with_rest = group[-2]
with_rest[2] -= self.measure_length
group[-2] = with_rest
add_rest = True
identifier = []
for n in group:
duration = n[2] - n[1]
if not n[0].velocity > 0:
duration = 0
identifier.append('%d/%d/%d/%d' % \
(n[0].note, n[0].velocity, n[0].time, duration))
identifier = '|'.join(identifier)
token = {
'notes': group,
'identifier': identifier
}
tokens.append(token)
if add_rest:
tokens.append(rest_token)
group = []
running_time = note.time
# and let's get on with the next group
note.velocity = 100 if note.velocity else 0
note = [note, running_time, 0] # note, start time, end time
# a note ender
if note[0].velocity == 0:
# we have to search backwards in the group to find the start
for (idx, n) in enumerate(group[::-1]):
if n[0].note == note[0].note:
group[len(group) - 1 - idx][2] = running_time
break
group.append(note)
return tokens
if __name__ == '__main__':
try:
f = sys.argv[1]
except IndexError:
f = 'gnossiennes_1.mid'
notation = NotationDistribution(f)
json.dump(notation.track_dists, open('dists.json', 'w'))