Skip to content

Commit 0d5649a

Browse files
author
FullStackDeveloper
authored
增加查询所有歌单,增加无信息处理
1 parent 4f5ae13 commit 0d5649a

File tree

5 files changed

+61
-34
lines changed

5 files changed

+61
-34
lines changed

music163/albums.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,37 +13,48 @@
1313
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
1414
}
1515
# https://music.163.com/#/artist/album?id=2116
16+
# https://music.163.com/#/artist/album?id=1177234&limit=12&offset=36
1617
url = 'https://music.163.com/artist/album'
1718

1819
'''
1920
IN:
2021
artist_id 歌手id
2122
OUT:
2223
album_infos = [{'album_id': '36875566', 'album_title': 'G.ream', 'album_time': '2017.12.07'},{}]
23-
24+
limit 分页歌曲数
25+
offset 分页开始
2426
'''
2527

2628

2729
def main_spider(artist_id='8888'):
28-
request = urllib.request.Request(url + '?id=' + artist_id, headers=header)
29-
response = urllib.request.urlopen(request)
30-
soup = BeautifulSoup(response.read().decode('utf-8'), 'html.parser')
31-
albums = soup.find('ul', {'class': 'm-cvrlst m-cvrlst-alb4 f-cb'})
30+
limit = 12
31+
offset = 0
3232
album_infos = []
33-
34-
album_basic_infos = albums.find_all('p', {'class': 'dec dec-1 f-thide2 f-pre'})
35-
for album_basic_info in album_basic_infos:
36-
album_infod = {}
37-
album_info = []
38-
# print(album_basic_info.find_next_sibling())
39-
album_infod['album_id'] = album_basic_info.find_next()['href'].replace('/album?id=', '').strip()
40-
album_infod['album_title'] = album_basic_info['title']
41-
album_infod['album_time'] = album_basic_info.find_next_sibling().text
42-
album_info.append(album_basic_info.find_next()['href'].replace('/album?id=', '').strip())
43-
album_info.append(album_basic_info['title'])
44-
album_info.append(album_basic_info.find_next_sibling().text)
45-
album_infos.append(album_info)
33+
while True:
34+
request = urllib.request.Request(url + '?id=' + artist_id + '&limit=' + str(limit) + '&offset=' + str(offset),
35+
headers=header)
36+
response = urllib.request.urlopen(request)
37+
soup = BeautifulSoup(response.read().decode('utf-8'), 'html.parser')
38+
if soup.find('ul', {'class': 'm-cvrlst m-cvrlst-alb4 f-cb'}) is None:
39+
return album_infos
40+
albums = soup.find('ul', {'class': 'm-cvrlst m-cvrlst-alb4 f-cb'})
41+
42+
if albums.find_all('p', {'class': 'dec dec-1 f-thide2 f-pre'}) is None:
43+
return album_infos
44+
album_basic_infos = albums.find_all('p', {'class': 'dec dec-1 f-thide2 f-pre'})
45+
for album_basic_info in album_basic_infos:
46+
album_infod = {}
47+
album_info = []
48+
# print(album_basic_info.find_next_sibling())
49+
album_infod['album_id'] = album_basic_info.find_next()['href'].replace('/album?id=', '').strip()
50+
album_infod['album_title'] = album_basic_info['title']
51+
album_infod['album_time'] = album_basic_info.find_next_sibling().text
52+
album_info.append(album_basic_info.find_next()['href'].replace('/album?id=', '').strip())
53+
album_info.append(album_basic_info['title'])
54+
album_info.append(album_basic_info.find_next_sibling().text)
55+
album_infos.append(album_info)
56+
offset += 12
4657

4758
return album_infos
4859

49-
#main_spider()
60+
# print(main_spider('1177234'))

music163/artists.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ def main_spider(id='1001', initial='-1'):
4141
else:
4242
artist_extend_info = artist_basic_info.find_next_sibling()['href'].replace('/user/home?id=', '').strip()
4343
artist_infod['artist_id'] = artist_basic_info['href'].replace('/artist?id=', '').strip()
44-
artist_infod['artist_name'] = artist_basic_info.text
44+
artist_infod['artist_name'] = artist_basic_info.text[0:25]
4545
artist_infod['artist_extend_id'] = artist_extend_info
4646
artist_info.append(artist_basic_info['href'].replace('/artist?id=', '').strip())
47-
artist_info.append(artist_basic_info.text)
47+
artist_info.append(artist_basic_info.text[0:25])
4848
artist_info.append(artist_extend_info)
4949

5050
artist_infos.append(artist_info)
5151
return artist_infos
5252

53-
#main_spider('1001', '65')
53+
#print(main_spider('1001', '65'))

music163/comments.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def main_spider(music_id='553543014'):
4040
# response = urllib.request.urlopen(request)
4141
# print(response.read().decode('utf-8'))
4242
r = requests.post(url=url + music_id + '?csrf_token=', data=comments_data1, headers=header)
43+
if r.text is None:
44+
return None
4345
comments_obj = json.loads(r.text)
4446
comment_infos = []
4547
for comment_obj in comments_obj['hotComments']:
@@ -79,4 +81,4 @@ def main_spider(music_id='553543014'):
7981

8082
return comment_infos
8183

82-
# main_spider('553543014')
84+
# print(main_spider('553543014'))

music163/musics.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def main_spider(album_id='74999481'):
2727
request = urllib.request.Request(url + '?id=' + album_id, headers=header)
2828
response = urllib.request.urlopen(request)
2929
soup = BeautifulSoup(response.read().decode('utf-8'), 'html.parser')
30+
if soup.find('div', {'id': 'song-list-pre-cache'}).find('ul', {'class': 'f-hide'}) is None:
31+
return None
3032
musics = soup.find('div', {'id': 'song-list-pre-cache'}).find('ul', {'class': 'f-hide'})
3133
music_basic_infos = musics.find_all('li')
3234
music_infos = []

music163/mysql.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,14 @@ def create_tables(create_type='all'):
6767
artist_infos_sql = '''
6868
CREATE TABLE artist_infos (
6969
artist_id varchar(20),
70-
artist_name varchar(40),
70+
artist_name varchar(50),
7171
artist_extend_id varchar(20)
7272
)ENGINE=innodb DEFAULT CHARSET=utf8;
7373
'''
7474
album_infos_sql = '''
7575
CREATE TABLE album_infos (
7676
album_id varchar(20),
77-
album_title varchar(100),
77+
album_title varchar(200),
7878
album_time varchar(20)
7979
)ENGINE=innodb DEFAULT CHARSET=utf8;
8080
'''
@@ -88,19 +88,19 @@ def create_tables(create_type='all'):
8888
CREATE TABLE comment_infos (
8989
comment_id varchar(30),
9090
user_id varchar(30),
91-
user_nickname varchar(100),
91+
user_nickname varchar(200),
9292
liked_count varchar(50),
93-
content varchar(1000),
93+
content varchar(2000),
9494
is_hot varchar(10)
9595
)ENGINE=innodb DEFAULT CHARSET=utf8;
9696
'''
97-
if create_type == 'artists':
97+
if create_type == 'artist_infos':
9898
cursor.execute(artist_infos_sql)
99-
elif create_type == 'albums':
99+
elif create_type == 'album_infos':
100100
cursor.execute(album_infos_sql)
101-
elif create_type == 'musics':
101+
elif create_type == 'music_infos':
102102
cursor.execute(music_infos_sql)
103-
elif create_type == 'comments':
103+
elif create_type == 'comment_infos':
104104
cursor.execute(comment_infos_sql)
105105
else:
106106
cursor.execute(artist_infos_sql)
@@ -109,9 +109,21 @@ def create_tables(create_type='all'):
109109
cursor.execute(comment_infos_sql)
110110

111111

112-
def drop_tables(table_name):
113-
sql = 'drop table ' + table_name
114-
cursor.execute(sql)
112+
def drop_tables(drop_type='all'):
113+
sql = 'drop table '
114+
if drop_type == 'artist_infos':
115+
cursor.execute(sql+drop_type)
116+
elif drop_type == 'album_infos':
117+
cursor.execute(sql+drop_type)
118+
elif drop_type == 'music_infos':
119+
cursor.execute(sql+drop_type)
120+
elif drop_type == 'comment_infos':
121+
cursor.execute(sql+drop_type)
122+
else:
123+
cursor.execute(sql+'artist_infos')
124+
cursor.execute(sql+'album_infos')
125+
cursor.execute(sql+'music_infos')
126+
cursor.execute(sql+'comment_infos')
115127

116128

117129
def close():

0 commit comments

Comments
 (0)