Skip to content

Commit

Permalink
更新douyin爬虫; 如果失效请手动更新douyin_cookies.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
long committed Mar 5, 2023
1 parent d124273 commit 0b20213
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 3 deletions.
Binary file modified __pycache__/crawlers.cpython-38.pyc
Binary file not shown.
9 changes: 8 additions & 1 deletion crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,18 @@ def douyin_search_video(self, search_keywords, offset=0):
search_keywords = quote(search_keywords)
print(search_keywords)
url = f'https://www.douyin.com/aweme/v1/web/search/item/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_video_web&sort_type=0&publish_time=0&keyword={search_keywords}&search_source=switch_tab&query_correct_type=1&is_filter_search=0&from_group_id=&offset={offset}&count=20&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=2560&screen_height=1440&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=107.0.0.0&browser_online=true&engine_name=Blink&engine_version=107.0.0.0&os_name=Windows&os_version=10&cpu_core_num=12&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=0&webid=7163531063863133732'
try:
# 读取抖音cookies
with open('douyin_cookies.txt', 'r', encoding='utf-8')as file:
cookies = file.read()
except:
cookies = None
raise Exception(f'请复制抖音cookies到 douyin_cookies.txt!!!')
headers = {
'authority': 'www.douyin.com',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
'referer': 'https://www.douyin.com/search/%E7%83%AD%E9%97%A8?publish_time=0&sort_type=0&source=switch_tab&type=video',
'cookie': 'douyin.com; s_v_web_id=verify_la7udy4g_4IJtmDpB_v6m8_4L0B_Bsm6_JAEMoLsxhKys; passport_csrf_token=186b7bcfd87f579c60a67d35f19fe9c1; passport_csrf_token_default=186b7bcfd87f579c60a67d35f19fe9c1; csrf_session_id=4f9475d4dbe24d70951c1ada6262a645; n_mh=WZp_FTaGhpNn8yHatnPpROsyat9W3gqOXXseck3zZew; passport_assist_user=CkGpXg5PjkXenQdbIfAgyoZnxyLMvFdP9N558Zx5Nak4vHJQmG27Tw9zZGbXr7VwzxaofcX1d7h6vGroV0fCIEa3yRpICjwBAfTlznkiNfqFj7y2NZ7yGaxIfiKBrB1YZCC1s_o4FmDJGO0VHqijEoigOJmrSbL55xijP2-5wFcx-vgQgvqgDRiJr9ZUIgEDWoDktQ%3D%3D; sso_uid_tt=11a6396bb5f53d6368b87fb650c85097; sso_uid_tt_ss=11a6396bb5f53d6368b87fb650c85097; toutiao_sso_user=6b166f85e829446968ce4fe951c6382b; toutiao_sso_user_ss=6b166f85e829446968ce4fe951c6382b; sid_ucp_sso_v1=1.0.0-KGNhZjYwNTY2NmI0MWQ5ZGVkNzI4YWExYTNlMGE4NDVjNmU1ZjUwMmUKHwjN8OCa7o39BRDyprmbBhjvMSAMMJbsuJcGOAZA9AcaAmxmIiA2YjE2NmY4NWU4Mjk0NDY5NjhjZTRmZTk1MWM2MzgyYg; ssid_ucp_sso_v1=1.0.0-KGNhZjYwNTY2NmI0MWQ5ZGVkNzI4YWExYTNlMGE4NDVjNmU1ZjUwMmUKHwjN8OCa7o39BRDyprmbBhjvMSAMMJbsuJcGOAZA9AcaAmxmIiA2YjE2NmY4NWU4Mjk0NDY5NjhjZTRmZTk1MWM2MzgyYg; passport_auth_status=040a2071608b8cf84c84b159c68dcd13%2Cd3437ad9218fc0a4fdaf64f3ae07cea9; passport_auth_status_ss=040a2071608b8cf84c84b159c68dcd13%2Cd3437ad9218fc0a4fdaf64f3ae07cea9; sid_guard=0e9c88492704dba9770c668cfb3ad65d%7C1668174708%7C5183998%7CTue%2C+10-Jan-2023+13%3A51%3A46+GMT; uid_tt=e3bf5c655838577b69114b35d1fc1835; uid_tt_ss=e3bf5c655838577b69114b35d1fc1835; sid_tt=0e9c88492704dba9770c668cfb3ad65d; sessionid=0e9c88492704dba9770c668cfb3ad65d; sessionid_ss=0e9c88492704dba9770c668cfb3ad65d; sid_ucp_v1=1.0.0-KDZiZDgxN2Q0YTc4NzZhY2ZhN2Y1OTMzMDQ2MjU5MmYyYzE0Yjc5NjEKGQjN8OCa7o39BRD0prmbBhjvMSAMOAZA9AcaAmhsIiAwZTljODg0OTI3MDRkYmE5NzcwYzY2OGNmYjNhZDY1ZA; ssid_ucp_v1=1.0.0-KDZiZDgxN2Q0YTc4NzZhY2ZhN2Y1OTMzMDQ2MjU5MmYyYzE0Yjc5NjEKGQjN8OCa7o39BRD0prmbBhjvMSAMOAZA9AcaAmhsIiAwZTljODg0OTI3MDRkYmE5NzcwYzY2OGNmYjNhZDY1ZA; ttwid=1%7CZJz8d7FMqSJauHQkZnXbfmEtX7U7VowfL4lrIHwxIrE%7C1669004154%7C1107c4182cf17778481cd718ed28fb50f32ef9d098167791279861fe527ee897; __ac_nonce=0637c2160001170e5b63a; __ac_signature=_02B4Z6wo00f01V9PI7AAAIDA1AS4U-D7XYVfbycAADSzKczKBkyLvUAJNT6gj4DHSBXo3JZ5DGKB3F2LjzMBzivdhNTUAWMwnPtZsqawVprpEIRbHLmyEM-dJB1PI1V6UatVyuQfUM-HZZ2wd3; FOLLOW_NUMBER_YELLOW_POINT_INFO=%22MS4wLjABAAAAlQ5yVlHkiaeoD0B0cS8JhPr3Rxq5X_L0rDXxt7DzRz13Mi6zuxv-Ig7cdtGdafPu%2F1669132800000%2F0%2F1669079409144%2F0%22; odin_tt=f7aa3135bb330c04a7d48dabecf474e0f2b15128c53fddb3ffbe4dc6098c583f2fb44d4dc8585abfaf0e9e18e0166bad; download_guide=%223%2F20221122%22; SEARCH_RESULT_LIST_TYPE=%22single%22; strategyABtestKey=%221669079772.107%22; msToken=G0-VTpRTq_f0OfV55_jAA5SMdpLeJy2zaB9NTjYB9SaSBRZfuNgcJ24UvdlIhKbb0ZN_f3YPxNcDfnShM4DI2unCBrPSycENSffVdHSphvcbqmxvZpwxLw==; msToken=Tj_LIlFpENGTrN5F0qzmbEe_bKv9OehazbEF4OEiRR_JKzAwNME_ZcsPzpI4CYqnVtzQppea9wjeYwhFNLtF4Xq3YliI2fliX_lxrtMu1K8AwXW_WWOfCA==; tt_scid=XdZaOsCXffhbGXRHfYqV3ZH7u538K.HV-IMI4C1koLPSjUCKCdu2QvefnvhkeFEL64f5; home_can_add_dy_2_desktop=%220%22'
'cookie': cookies
}
res = self.simple_get(url=url, headers=headers)
# del cookies['douyin.com']
Expand Down
1 change: 1 addition & 0 deletions douyin_cookies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ttwid=1|jksKIUu5rK0vVPfqN9Pg5YNwyAa3KmWaQLqjRpDTIKY|1678022129|fb02f6395d6c4eed052367ae75211a4cda239f68f1f06554defdac19cb2ac503; douyin.com; strategyABtestKey="1678022130.362"; s_v_web_id=verify_levf1udi_juJD5bhR_t9T3_4Skz_928d_FmaadWxwUPIK; passport_csrf_token=c1553dabb48d13c1dc090d272e6c8d5a; passport_csrf_token_default=c1553dabb48d13c1dc090d272e6c8d5a; d_ticket=aeb4fe186934e93ed50744e1b5cda9a7663db; passport_assist_user=Cj2BOX6FvuUUsITANubFpuDRkw870hN9DzD34Wm9Gh8oErB3-q27Tc2p1OElp9BO3UvJWppUv4rHaTf0oc2TGkgKPF2oe2ixCT44DbtmfaHcATiHYricbu00rWb2-6FkLSbTkn4YL7nbVyyTaFpS6p-6aN8vPDSLqXlySKFDiBCP_KoNGImv1lQiAQNeTs4c; n_mh=ZOJMDPN5d-8bA_TXM7XeCexFIro0A5PHa_xA2lBsqWQ; sso_auth_status=1efbcded6ac0a2f0948656d1876171e9; sso_auth_status_ss=1efbcded6ac0a2f0948656d1876171e9; sso_uid_tt=e71eadfe51542081717d6efaa9b2d4c9; sso_uid_tt_ss=e71eadfe51542081717d6efaa9b2d4c9; toutiao_sso_user=9fc1930d2d7e63969b9da66cf4ac9048; toutiao_sso_user_ss=9fc1930d2d7e63969b9da66cf4ac9048; sid_ucp_sso_v1=1.0.0-KDNkYmRjNTVhOWIyN2Q5N2U0ZjEyMzViNjllMWIyNDRjZThkOTQ0ODIKHQjC79TN-AIQkaySoAYY7zEgDDDRqN7ZBTgCQPEHGgJsZiIgOWZjMTkzMGQyZDdlNjM5NjliOWRhNjZjZjRhYzkwNDg; ssid_ucp_sso_v1=1.0.0-KDNkYmRjNTVhOWIyN2Q5N2U0ZjEyMzViNjllMWIyNDRjZThkOTQ0ODIKHQjC79TN-AIQkaySoAYY7zEgDDDRqN7ZBTgCQPEHGgJsZiIgOWZjMTkzMGQyZDdlNjM5NjliOWRhNjZjZjRhYzkwNDg; odin_tt=5546536ad26d92addf8f01cb2702d91181a8b9a9db89369a7db62a08e2d863a113e8deefc5e0f621d3e6ef8f58f86fbd02a987aeff17e60dd7046a2a338af2db; passport_auth_status=3fec557fcec4f19ca4a9eac17437340e,23f131f18b6ddcc4fffec7bf3bbbd2fd; passport_auth_status_ss=3fec557fcec4f19ca4a9eac17437340e,23f131f18b6ddcc4fffec7bf3bbbd2fd; uid_tt=a4f9318aefc520a8023b72dbc2295bb3; uid_tt_ss=a4f9318aefc520a8023b72dbc2295bb3; sid_tt=6e37f25d8c21b8db4602fa46bcd4b772; sessionid=6e37f25d8c21b8db4602fa46bcd4b772; sessionid_ss=6e37f25d8c21b8db4602fa46bcd4b772; __ac_nonce=06404961100a94b79a003; VIDEO_FILTER_MEMO_SELECT={"expireTime":1678627016068,"type":1}; LOGIN_STATUS=1; FOLLOW_LIVE_POINT_INFO="MS4wLjABAAAA9mdrLTs1sEmNKc7ut5UzhjnyWQ7W_3b3HKQQwaP5igs/1678032000000/0/1678022216258/0"; store-region=cn-gd; store-region-src=uid; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWNsaWVudC1jZXJ0IjoiLS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tXG5NSUlDRXpDQ0FicWdBd0lCQWdJVWFiZEhMNUo1WHpCcm5wWUxGMG1JUSt4a0YxUXdDZ1lJS29aSXpqMEVBd0l3XG5NVEVMTUFrR0ExVUVCaE1DUTA0eElqQWdCZ05WQkFNTUdYUnBZMnRsZEY5bmRXRnlaRjlqWVY5bFkyUnpZVjh5XG5OVFl3SGhjTk1qTXdNekExTVRNeE5qQXhXaGNOTXpNd016QTFNakV4TmpBeFdqQW5NUXN3Q1FZRFZRUUdFd0pEXG5UakVZTUJZR0ExVUVBd3dQWW1SZmRHbGphMlYwWDJkMVlYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEXG5BUWNEUWdBRVFLM0JqRmRJdXcraXk3aDJucmRsWDRiS0hqWmVnQWpjNnEzaytEcXd5cTN5NGJ0amhlSDJwWmJCXG5uVGR2YWkrNktrblgzVDFTcmFPdDc1K2p6am8wM0tPQnVUQ0J0akFPQmdOVkhROEJBZjhFQkFNQ0JhQXdNUVlEXG5WUjBsQkNvd0tBWUlLd1lCQlFVSEF3RUdDQ3NHQVFVRkJ3TUNCZ2dyQmdFRkJRY0RBd1lJS3dZQkJRVUhBd1F3XG5LUVlEVlIwT0JDSUVJQWpFak01R3ZPQkhvdjNXWEFVRGk2REJGZmY4UjZkVFhvcFBpcUNqaWI1MU1Dc0dBMVVkXG5Jd1FrTUNLQUlES2xaK3FPWkVnU2pjeE9UVUI3Y3hTYlIyMVRlcVRSZ05kNWxKZDdJa2VETUJrR0ExVWRFUVFTXG5NQkNDRG5kM2R5NWtiM1Y1YVc0dVkyOXRNQW9HQ0NxR1NNNDlCQU1DQTBjQU1FUUNJQW52cHkreVowMHFIdnNiXG5ocm55bm55SEJHZmhiT2R3SVpiYUp6dm1PbkFvQWlCdUJzL001U2RRMUp4Z3lwRWpLMExIdGZoVGNPN1YzZHk0XG5rUlpNc3BLSFRRPT1cbi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS1cbiJ9; bd_ticket_guard_server_data=; csrf_session_id=d7ee7cb6dd02be8dcbc16a59d8622bed; sid_guard=6e37f25d8c21b8db4602fa46bcd4b772|1678022217|5183944|Thu,+04-May-2023+13:16:01+GMT; sid_ucp_v1=1.0.0-KGVkOGNkZWM0YTQ4ZTc2ZmU3MjU1ZmMwNTg0YWRmNWNkYjcyZmVkZjYKGQjC79TN-AIQyaySoAYY7zEgDDgCQPEHSAQaAmxxIiA2ZTM3ZjI1ZDhjMjFiOGRiNDYwMmZhNDZiY2Q0Yjc3Mg; ssid_ucp_v1=1.0.0-KGVkOGNkZWM0YTQ4ZTc2ZmU3MjU1ZmMwNTg0YWRmNWNkYjcyZmVkZjYKGQjC79TN-AIQyaySoAYY7zEgDDgCQPEHSAQaAmxxIiA2ZTM3ZjI1ZDhjMjFiOGRiNDYwMmZhNDZiY2Q0Yjc3Mg; msToken=b1jvFh6AsZ8oYQUNsytK6CN9Kc2PRHJZdkCUqgoKKLogioDPkch_D2zZsmNoU9KOa_GOA6i5HisACV60zUE0P6TsGtk2iulekskJnbCZuYb0nK-4mdbz; my_rd=1; tt_scid=us-rfku2582RUkZi0Rto-lSL0rRHh7s07QrJA8hjvbVNf-9OCppbs5KdaKYQBw2S7130; home_can_add_dy_2_desktop="1"; passport_fe_beating_status=true; msToken=rGVaL7M2X3IngRoI1c8XZJrEIEx3JjjCv-Nut4M8Ho4v5oBpSdH72PzpEEzy--tEL-wvH6gm27X8iuo5n68mIjYxDAgWZ4dWSdjTsyzr3hsJ0yjCJi8rQfR7itDvgg==
7 changes: 5 additions & 2 deletions timing_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,26 +44,29 @@ def kill_orphan_chrome():
kill_orphan_chrome()
print(f'keyword: {keyword}')
if crawlers_config['Tiktok_crawler'] == 'True':
print(f'开启tiktok爬虫...')
try:
crawler.tiktok_crawler(keyword)
except:
pass
if keyword == 'funny' or keyword == 'hot':
if crawlers_config['Youtube_crawler'] == 'True':
print(f'开启youtube爬虫...')
try:
crawler.youtube_crawler(keyword)
except:
pass
time.sleep(6)
time.sleep(2)
except:
continue
for keyword in keywords_chinese.split(','):
try:
kill_orphan_chrome()
print(f'keyword: {keyword}')
if crawlers_config['Douyin_crawler'] == 'True':
print(f'开启douyin爬虫...')
crawler.douyin_crawler(keyword)
time.sleep(6)
time.sleep(3)
except Exception as e:
print(f'error: {e}')
continue

0 comments on commit 0b20213

Please sign in to comment.