Update spider.py

崔庆才丨静觅 · web-flow · commit 44f76ba62fdb · 2019-02-18T18:06:51.000+08:00
diff --git a/spider.py b/spider.py
@@ -9,19 +9,23 @@
 
 def get_page(offset):
     params = {
+        'aid': '24',
         'offset': offset,
         'format': 'json',
-        'keyword': '街拍',
+        #'keyword': '街拍',
         'autoload': 'true',
         'count': '20',
         'cur_tab': '1',
-        'from': 'search_tab'
+        'from': 'search_tab',
+        'pd': 'synthesis'
     }
-    base_url = 'https://www.toutiao.com/search_content/?'
+    base_url = 'https://www.toutiao.com/api/search/content/?keyword=%E8%A1%97%E6%8B%8D'
     url = base_url + urlencode(params)
     try:
         resp = requests.get(url)
-        if codes.ok == resp.status_code:
+        print(url)
+        if 200  == resp.status_code:
+            print(resp.json())
             return resp.json()
     except requests.ConnectionError:
         return None
@@ -36,15 +40,18 @@ def get_images(json):
             title = item.get('title')
             images = item.get('image_list')
             for image in images:
-                origin_image = re.sub("list", "origin"，image.get('url')
+                origin_image = re.sub("list", "origin", image.get('url'))
                 yield {
-                    'image': 'https:' + origin_image,
+                    'image':  origin_image,
+                    # 'iamge': image.get('url'),
                     'title': title
                 }
 
+print('succ')
 
 def save_image(item):
     img_path = 'img' + os.path.sep + item.get('title')
+    print('succ2')
     if not os.path.exists(img_path):
         os.makedirs(img_path)
     try:
@@ -54,9 +61,11 @@ def save_image(item):
                 file_name=md5(resp.content).hexdigest(),
                 file_suffix='jpg')
             if not os.path.exists(file_path):
+                print('succ3')
                 with open(file_path, 'wb') as f:
                     f.write(resp.content)
                 print('Downloaded image path is %s' % file_path)
+                print('succ4')
             else:
                 print('Already Downloaded', file_path)
     except requests.ConnectionError: