9
9
10
10
def get_page (offset ):
11
11
params = {
12
+ 'aid' : '24' ,
12
13
'offset' : offset ,
13
14
'format' : 'json' ,
14
- 'keyword' : '街拍' ,
15
+ # 'keyword': '街拍',
15
16
'autoload' : 'true' ,
16
17
'count' : '20' ,
17
18
'cur_tab' : '1' ,
18
- 'from' : 'search_tab'
19
+ 'from' : 'search_tab' ,
20
+ 'pd' : 'synthesis'
19
21
}
20
- base_url = 'https://www.toutiao.com/search_content/? '
22
+ base_url = 'https://www.toutiao.com/api/search/content/?keyword=%E8%A1%97%E6%8B%8D '
21
23
url = base_url + urlencode (params )
22
24
try :
23
25
resp = requests .get (url )
24
- if codes .ok == resp .status_code :
26
+ print (url )
27
+ if 200 == resp .status_code :
28
+ print (resp .json ())
25
29
return resp .json ()
26
30
except requests .ConnectionError :
27
31
return None
@@ -36,15 +40,18 @@ def get_images(json):
36
40
title = item .get ('title' )
37
41
images = item .get ('image_list' )
38
42
for image in images :
39
- origin_image = re .sub ("list" , "origin" , image .get ('url' )
43
+ origin_image = re .sub ("list" , "origin" , image .get ('url' ) )
40
44
yield {
41
- 'image' : 'https:' + origin_image ,
45
+ 'image' : origin_image ,
46
+ # 'iamge': image.get('url'),
42
47
'title' : title
43
48
}
44
49
50
+ print ('succ' )
45
51
46
52
def save_image (item ):
47
53
img_path = 'img' + os .path .sep + item .get ('title' )
54
+ print ('succ2' )
48
55
if not os .path .exists (img_path ):
49
56
os .makedirs (img_path )
50
57
try :
@@ -54,9 +61,11 @@ def save_image(item):
54
61
file_name = md5 (resp .content ).hexdigest (),
55
62
file_suffix = 'jpg' )
56
63
if not os .path .exists (file_path ):
64
+ print ('succ3' )
57
65
with open (file_path , 'wb' ) as f :
58
66
f .write (resp .content )
59
67
print ('Downloaded image path is %s' % file_path )
68
+ print ('succ4' )
60
69
else :
61
70
print ('Already Downloaded' , file_path )
62
71
except requests .ConnectionError :
0 commit comments