Skip to content

Commit 9eca5c3

Browse files
author
cztps2
committed
01
1 parent dfa41c2 commit 9eca5c3

File tree

5 files changed

+108
-0
lines changed

5 files changed

+108
-0
lines changed

01. multi_thread_craw.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import blog_spider
2+
import threading
3+
import time
4+
5+
6+
def single_thread():
7+
print("single_thread begin")
8+
for url in blog_spider.urls:
9+
blog_spider.craw(url)
10+
print("single_thread end")
11+
12+
13+
def multi_thread():
14+
print("multi_thread begin")
15+
threads = []
16+
for url in blog_spider.urls:
17+
threads.append(
18+
threading.Thread(target=blog_spider.craw, args=(url,))
19+
)
20+
21+
for thread in threads:
22+
thread.start()
23+
24+
for thread in threads:
25+
thread.join()
26+
27+
print("multi_thread end")
28+
29+
30+
if __name__ == "__main__":
31+
start = time.time()
32+
single_thread()
33+
end = time.time()
34+
print("single thread cost:", end - start, "seconds")
35+
36+
start = time.time()
37+
multi_thread()
38+
end = time.time()
39+
print("multi thread cost:", end - start, "seconds")

Python并发编程简介.pptx

7.87 KB
Binary file not shown.

blog_spider.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import requests
2+
3+
urls = [
4+
f"https://www.cnblogs.com/#p{page}"
5+
for page in range(1, 50 + 1)
6+
]
7+
8+
9+
def craw(url):
10+
r = requests.get(url)
11+
print(url, len(r.text), r.status_code)
12+
13+
14+
craw(urls[2])
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from tmp import blog_spider
2+
import threading
3+
4+
5+
def single_thread():
6+
print("single_thread begin")
7+
for url in blog_spider.urls:
8+
blog_spider.craw(url)
9+
print("single_thread end")
10+
11+
12+
def multi_thread():
13+
print("multi_thread begin")
14+
threads = []
15+
16+
for url in blog_spider.urls:
17+
threads.append(threading.Thread(target=blog_spider.craw, args=(url,)))
18+
19+
for thread in threads:
20+
thread.start()
21+
22+
for thread in threads:
23+
thread.join()
24+
25+
print("multi_thread end")
26+
27+
28+
import time
29+
30+
if __name__ == "__main__":
31+
start = time.time()
32+
single_thread()
33+
end = time.time()
34+
print("single thread:", end - start, "seconds")
35+
36+
start = time.time()
37+
multi_thread()
38+
end = time.time()
39+
print("multi thread:", end - start, "seconds")

tmp/blog_spider.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import requests
2+
3+
urls = [
4+
f"http://www.crazyant.net/page/{idx}"
5+
for idx in range(1, 10 + 1)
6+
]
7+
8+
9+
def craw(url):
10+
r = requests.get(url)
11+
print(url, len(r.text))
12+
13+
14+
if __name__ == "__main__":
15+
print(urls)
16+
craw(urls[0])

0 commit comments

Comments
 (0)