-
Notifications
You must be signed in to change notification settings - Fork 0
/
wwr.py
38 lines (34 loc) · 1.47 KB
/
wwr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from requests import get
from bs4 import BeautifulSoup
def extract_wwr_jobs(keyword):
base_url = "https://weworkremotely.com/remote-jobs/search?term="
response = get(f"{base_url}{keyword}")
if response.status_code != 200:
print("Can't request website")
else:
results = []
soup = BeautifulSoup(response.text, "html.parser")
# "html.parser" tells Beautifulsoup to send HTML.
jobs = soup.find_all("section", class_="jobs")
# Find all the section that have the class of jobs.
# class_="jobs" is keyword argument.
for job_section in jobs:
job_posts = job_section.find_all("li")
job_posts.pop(-1)
# pop method is for removing view-all list, it is located on the last of the list.
for post in job_posts:
anchors = post.find_all("a")
anchor = anchors[1]
link = anchor["href"]
company, region = anchor.find_all(
"span", class_="company")
# Shortcut
title = anchor.find("span", class_="title")
job_data = {
"link": f"https://weworkremotely.com{link}",
"company": company.string.replace(",", " "),
"location": region.string.replace(",", " "),
"position": title.string.replace(",", " "),
}
results.append(job_data)
return results