-
Notifications
You must be signed in to change notification settings - Fork 1
/
rhyming.py
64 lines (53 loc) · 1.76 KB
/
rhyming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import nltk
import re
import os
nltk.data.path.append(os.getcwd()+"/nltk_data")
d = nltk.corpus.cmudict.dict()
def count_syllables(word):
vowels = ("a", "e", "i", "o", "u", "A", "E", "I", "O", "U")
if word.lower() in d:
return max([len([y for y in x if (y[-1].isdigit())]) for x in d[word.lower()]])
else:
return sum(word.count(c) for c in vowels)
def real_word(w):
return re.match(".*\w+.*", w) and w in d and len(d[w]) == 1
def rhyme(word1, word2):
if word1 in word2 or word2 in word1:
return 0
w1 = d[word1][0][::-1]
w2 = d[word2][0][::-1]
if w1 == w2:
return 0
else:
return phoneme_match(w1, w2)
def phoneme_match(w1, w2):
matching_phonemes = 0
vowel_match = True
vowel_found = False
matching_phonemes = 0
for i in range(0, min(len(w1), len(w2))):
if vowel_match:
if w1[i][-1].isdigit() and w2[i][-1].isdigit() and w1[i][:-1] == w2[i][:-1]:
matching_phonemes += 1
vowel_found = True
elif w1[i] == w2[i]:
matching_phonemes += 1
else:
vowel_match = False
if vowel_found:
return matching_phonemes
else:
return 0
def find_rhymes(lyrics1, lyrics2):
m = 0
hit = (0, 0, 0)
for index1, line1 in enumerate(lyrics1):
tokens1 = nltk.word_tokenize(line1)
if tokens1:
for index2, line2 in enumerate(lyrics2):
tokens2 = nltk.word_tokenize(line2)
if tokens2 and real_word(tokens1[-1]) and real_word(tokens2[-1]):
if rhyme(tokens1[-1], tokens2[-1]) > m:
m = rhyme(tokens1[-1], tokens2[-1])
hit = (m, index1, index2)
return hit