-
Notifications
You must be signed in to change notification settings - Fork 121
/
Copy pathtiming_crawl.py
72 lines (66 loc) · 2.26 KB
/
timing_crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import random
import sys
import time
import os
from crawlers import Crawlers
import configparser
config = configparser.ConfigParser()
config.read('config.ini', encoding='utf-8')
crawlers_config = config['Crawlers']
keywords_english = crawlers_config['Keywords_english']
keywords_chinese = crawlers_config['Keywords_chinese']
def kill_orphan_chrome():
num = 1
while True:
if sys.argv[1] == 'test':
break
else:
if os.popen('ps -f --ppid 1 | grep chromedriver').read():
try:
os.system("ps -f --ppid 1 | grep chromedriver | awk '{print $2}' | xargs kill -9")
except:
pass
if os.popen('ps -f --ppid 1 | grep chrome').read():
try:
os.system("ps -f --ppid 1 | grep chrome | awk '{print $2}' | xargs kill -9")
except:
pass
if os.popen('ps -f --ppid 1 | grep chromedriver').read() == '' and os.popen(
'ps -f --ppid 1 | grep chrome').read() == '':
break
num += 1
time.sleep(random.uniform(0.1, 0.2))
if num > 3:
break
crawler = Crawlers()
for keyword in keywords_english.split(','):
try:
kill_orphan_chrome()
print(f'keyword: {keyword}')
if crawlers_config['Tiktok_crawler'] == 'True':
print(f'开启tiktok爬虫...')
try:
crawler.tiktok_crawler(keyword)
except:
pass
if keyword == 'funny' or keyword == 'hot':
if crawlers_config['Youtube_crawler'] == 'True':
print(f'开启youtube爬虫...')
try:
crawler.youtube_crawler(keyword)
except:
pass
time.sleep(2)
except:
continue
for keyword in keywords_chinese.split(','):
try:
kill_orphan_chrome()
print(f'keyword: {keyword}')
if crawlers_config['Douyin_crawler'] == 'True':
print(f'开启douyin爬虫...')
crawler.douyin_crawler(keyword)
time.sleep(3)
except Exception as e:
print(f'error: {e}')
continue