Skip to content

Commit 44f76ba

Browse files
author
崔庆才丨静觅
authored
Update spider.py
1 parent ccc9f5a commit 44f76ba

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

spider.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,23 @@
99

1010
def get_page(offset):
1111
params = {
12+
'aid': '24',
1213
'offset': offset,
1314
'format': 'json',
14-
'keyword': '街拍',
15+
#'keyword': '街拍',
1516
'autoload': 'true',
1617
'count': '20',
1718
'cur_tab': '1',
18-
'from': 'search_tab'
19+
'from': 'search_tab',
20+
'pd': 'synthesis'
1921
}
20-
base_url = 'https://www.toutiao.com/search_content/?'
22+
base_url = 'https://www.toutiao.com/api/search/content/?keyword=%E8%A1%97%E6%8B%8D'
2123
url = base_url + urlencode(params)
2224
try:
2325
resp = requests.get(url)
24-
if codes.ok == resp.status_code:
26+
print(url)
27+
if 200 == resp.status_code:
28+
print(resp.json())
2529
return resp.json()
2630
except requests.ConnectionError:
2731
return None
@@ -36,15 +40,18 @@ def get_images(json):
3640
title = item.get('title')
3741
images = item.get('image_list')
3842
for image in images:
39-
origin_image = re.sub("list", "origin"image.get('url')
43+
origin_image = re.sub("list", "origin", image.get('url'))
4044
yield {
41-
'image': 'https:' + origin_image,
45+
'image': origin_image,
46+
# 'iamge': image.get('url'),
4247
'title': title
4348
}
4449

50+
print('succ')
4551

4652
def save_image(item):
4753
img_path = 'img' + os.path.sep + item.get('title')
54+
print('succ2')
4855
if not os.path.exists(img_path):
4956
os.makedirs(img_path)
5057
try:
@@ -54,9 +61,11 @@ def save_image(item):
5461
file_name=md5(resp.content).hexdigest(),
5562
file_suffix='jpg')
5663
if not os.path.exists(file_path):
64+
print('succ3')
5765
with open(file_path, 'wb') as f:
5866
f.write(resp.content)
5967
print('Downloaded image path is %s' % file_path)
68+
print('succ4')
6069
else:
6170
print('Already Downloaded', file_path)
6271
except requests.ConnectionError:

0 commit comments

Comments
 (0)