01

cztps2 · cztps2 · commit 9eca5c360ebc · 2020-11-08T22:43:12.000+08:00
diff --git a/01. multi_thread_craw.py b/01. multi_thread_craw.py
@@ -0,0 +1,39 @@
+import blog_spider
+import threading
+import time
+
+
+def single_thread():
+    print("single_thread begin")
+    for url in blog_spider.urls:
+        blog_spider.craw(url)
+    print("single_thread end")
+
+
+def multi_thread():
+    print("multi_thread begin")
+    threads = []
+    for url in blog_spider.urls:
+        threads.append(
+            threading.Thread(target=blog_spider.craw, args=(url,))
+        )
+
+    for thread in threads:
+        thread.start()
+
+    for thread in threads:
+        thread.join()
+
+    print("multi_thread end")
+
+
+if __name__ == "__main__":
+    start = time.time()
+    single_thread()
+    end = time.time()
+    print("single thread cost:", end - start, "seconds")
+
+    start = time.time()
+    multi_thread()
+    end = time.time()
+    print("multi thread cost:", end - start, "seconds")
diff --git a/Python并发编程简介.pptx b/Python并发编程简介.pptx
diff --git a/blog_spider.py b/blog_spider.py
@@ -0,0 +1,14 @@
+import requests
+
+urls = [
+    f"https://www.cnblogs.com/#p{page}"
+    for page in range(1, 50 + 1)
+]
+
+
+def craw(url):
+    r = requests.get(url)
+    print(url, len(r.text), r.status_code)
+
+
+craw(urls[2])
diff --git a/tmp/01. 线程的创建以及与单线程的对比.py b/tmp/01. 线程的创建以及与单线程的对比.py
@@ -0,0 +1,39 @@
+from tmp import blog_spider
+import threading
+
+
+def single_thread():
+    print("single_thread begin")
+    for url in blog_spider.urls:
+        blog_spider.craw(url)
+    print("single_thread end")
+
+
+def multi_thread():
+    print("multi_thread begin")
+    threads = []
+
+    for url in blog_spider.urls:
+        threads.append(threading.Thread(target=blog_spider.craw, args=(url,)))
+
+    for thread in threads:
+        thread.start()
+
+    for thread in threads:
+        thread.join()
+
+    print("multi_thread end")
+
+
+import time
+
+if __name__ == "__main__":
+    start = time.time()
+    single_thread()
+    end = time.time()
+    print("single thread:", end - start, "seconds")
+
+    start = time.time()
+    multi_thread()
+    end = time.time()
+    print("multi thread:", end - start, "seconds")
diff --git a/tmp/blog_spider.py b/tmp/blog_spider.py
@@ -0,0 +1,16 @@
+import requests
+
+urls = [
+    f"http://www.crazyant.net/page/{idx}"
+    for idx in range(1, 10 + 1)
+]
+
+
+def craw(url):
+    r = requests.get(url)
+    print(url, len(r.text))
+
+
+if __name__ == "__main__":
+    print(urls)
+    craw(urls[0])