Skip to content

Commit f5a6f7a

Browse files
committed
Set up web_scraping project
1 parent d5072d0 commit f5a6f7a

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

web_scraping/scraping_project.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# http://quotes.toscrape.com/
2+
import requests
3+
from bs4 import BeautifulSoup
4+
from time import sleep
5+
from csv import writer
6+
7+
all_quotes = []
8+
base_url = "http://quotes.toscrape.com/"
9+
url = "/page/1"
10+
11+
while url:
12+
res = requests.get(f"{base_url}{url}")
13+
print(f"Now scraping {base_url}{url}")
14+
soup = BeautifulSoup(res.text, "html.parser")
15+
quotes = soup.find_all(class_="quote")
16+
17+
for quote in quotes:
18+
all_quotes.append({
19+
"text": quote.find(class_="text").get_text(),
20+
"author": quote.find(class_="author").get_text(),
21+
"bio_link": quote.find("a")["href"]
22+
})
23+
next_btn = soup.find(class_="next")
24+
url = next_btn.find("a")["href"] if next_btn else None
25+
sleep(3) # wait between scraping pages
26+
27+
print(all_quotes)

0 commit comments

Comments
 (0)