-
Notifications
You must be signed in to change notification settings - Fork 1
/
tswift_modified.py
202 lines (167 loc) · 6.04 KB
/
tswift_modified.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python3
"""
Get Taylor Swift Lyrics!
Actually, this module is more general than just Taylor Swift. It is a
MetroLyrics API of sorts, that allows you to get song lyrics and find all songs
by an artist. My use case for this API was rather simple, and thus the API
doesn't have much. It is mainly useful if you want to get the lyrics of all
songs written by a certain artist (for example, Taylor Swift).
"""
from lxml import html
from google import search
import argparse
import requests
import re
import random
import sys
ARTIST_URL = "http://www.metrolyrics.com/{artist}-alpage-{n}.html"
SONG_URL = "http://www.metrolyrics.com/{title}-lyrics-{artist}.html"
SONG_RE = r'http://www\.metrolyrics\.com/(.*)-lyrics-(.*)\.html'
def slugify(string):
return string.replace(' ', '-').lower()
def deslugify(string):
return string.replace('-', ' ').title()
class Song(object):
"""An object that represents a song, whose lyrics can be retrieved."""
def __init__(self, title=None, artist=None, url=None):
"""
Create a song.
You can EITHER provide a URL for the song lyrics, OR provide the song
title and artist (which will be slugified). If both are provided, the
URL is preferred.
"""
self._lyrics = None
if url is not None:
self._url = url
self.title, self.artist = re.match(SONG_RE, url).groups()
elif title is not None and artist is not None:
self.title = title
self.artist = artist
self._url = SONG_URL.format(
title=slugify(title),
artist=slugify(artist),
)
else:
raise ValueError('Must provide either title & artist or URL.')
self.title = deslugify(self.title)
self.artist = deslugify(self.artist)
def load(self):
"""Load the lyrics from MetroLyrics."""
page = requests.get(self._url)
# Forces utf-8 to prevent character mangling
page.encoding = 'utf-8'
tree = html.fromstring(page.text)
lyric_div = tree.get_element_by_id('lyrics-body-text')
verses = [c.text_content() for c in lyric_div.find_class('verse')]
self._lyrics = '\n\n'.join(verses)
return self
@property
def lyrics(self):
if self._lyrics is None:
self.load()
return self._lyrics
def format(self):
return '%s\n%s\n%s\n\n%s' % (
self.title,
self.artist,
'-' * max(len(self.title), len(self.artist)),
self.lyrics,
)
def __repr__(self):
return 'Song(title=%r, artist=%r)' % (self.title, self.artist)
@staticmethod
def find_song(lyrics):
for url in search('song lyrics ' + lyrics + "metrolyrics", stop=20):
if re.match(SONG_RE, url):
return Song(url=url)
return None
class Artist(object):
"""
An object that represents an artist, and can get you their songs.
Pass into the constructor the "name" of the artist. Generally, this is the
lower case name with spaces replaced by hyphens, and punctuation removed.
I don't really provide any utilities for searching for this name. If you
just Google the artist + " lyrics", you'll probably get their MetroLyrics
page, and so you can get the artist's "name" from that.
"""
def __init__(self, name):
self._songs = None
self.name = slugify(name)
def load(self, verbose=False):
"""
Load the list of songs.
Note that this only loads a list of songs that this artist was the main
artist of. If they were only featured in the song, that song won't be
listed here. There is a list on the artist page for that, I just
haven't added any parsing code for that, since I don't need it.
"""
self._songs = []
page_num = 1
total_pages = 1
while page_num <= total_pages:
if verbose:
print('retrieving page %d' % page_num)
page = requests.get(ARTIST_URL.format(artist=self.name,
n=page_num))
tree = html.fromstring(page.text)
song_rows_xp = r'//*[@id="popular"]/div/table/tbody/tr'
songlist_pagination_xp = r'//*[@id="main-content"]/div[1]/'\
'div[2]/p/span/a'
rows = tree.xpath(song_rows_xp)
for row in rows:
song_link = row.xpath(r'./td/a[contains(@class,"title")]')
assert len(song_link) == 1
self._songs.append(Song(url=song_link[0].attrib['href']))
total_pages = len(tree.xpath(songlist_pagination_xp))
page_num += 1
return self
@property
def songs(self):
if self._songs is None:
self.load()
return self._songs
def __repr__(self):
return 'Artist(%r)' % self.name
def main():
"""
Run the CLI.
"""
parser = argparse.ArgumentParser(
description='Search artists, lyrics, and songs!'
)
parser.add_argument(
'artist',
help='Specify an artist name (Default: Taylor Swift)',
default='Taylor Swift',
nargs='?',
)
parser.add_argument(
'-s', '--song',
help='Given artist name, specify a song name',
required=False,
)
parser.add_argument(
'-l', '--lyrics',
help='Search for song by lyrics',
required=False,
)
args = parser.parse_args()
if args.lyrics:
song = Song.find_song(args.lyrics)
else:
if args.song:
song = Song(
title=args.song,
artist=args.artist,
)
else:
artist = Artist(args.artist)
if artist.songs:
song = random.choice(artist.songs)
else:
print('Couldn\'t find any songs by artist {}!'
.format(args.artist))
sys.exit(1)
print(song.format())
if __name__ == '__main__':
main()