Skip to content

Commit 8f8e4e7

Browse files
committed
DAY 13 OF 14 PYTHON CHALLENGE
1 parent cf79b47 commit 8f8e4e7

File tree

7 files changed

+295
-1
lines changed

7 files changed

+295
-1
lines changed
Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
from flask import Flask, render_template, request, redirect
2+
from scrapper import aggregate_subreddits, check_subreddit
3+
from remote import remote_jobs
4+
from sof import get_jobs
5+
16
"""
27
These are the URLs that will give you remote jobs for the word 'python'
38
@@ -6,4 +11,60 @@
611
https://remoteok.io/remote-dev+python-jobs
712
813
Good luck!
9-
"""
14+
"""
15+
16+
remote = remote_jobs()
17+
sof = get_jobs()
18+
19+
jobs = remote_jobs() + get_jobs()
20+
21+
22+
app = Flask("Remote World")
23+
24+
subreddits = [
25+
"javascript",
26+
"reactjs",
27+
"reactnative",
28+
"programming",
29+
"css",
30+
"golang",
31+
"flutter",
32+
"rust",
33+
"django"
34+
]
35+
36+
@app.route("/")
37+
def home():
38+
return render_template("home.html", subreddits=subreddits)
39+
40+
41+
@app.route("/read")
42+
def read():
43+
selected = []
44+
for subreddit in subreddits:
45+
if subreddit in request.args:
46+
selected.append(subreddit)
47+
posts = aggregate_subreddits(selected)
48+
posts.sort(key=lambda post: post['votes'], reverse=True)
49+
return render_template("read.html", selected=selected, posts=posts)
50+
51+
52+
@app.route("/add",methods=['POST'])
53+
def add():
54+
to_add = request.form.get('new-subreddit',None)
55+
if to_add:
56+
if "/" not in to_add:
57+
exists = check_subreddit(to_add)
58+
if exists:
59+
subreddits.append(to_add)
60+
return redirect("/")
61+
else:
62+
error = "That page does not exist."
63+
else:
64+
error = "Write the name without /r/"
65+
else:
66+
error = "Write a text."
67+
return render_template("add-failed.html", error=error)
68+
69+
70+
app.run(host="0.0.0.0")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<!DOCTYPE html>
2+
<html>
3+
4+
<head>
5+
<title>
6+
Remote World
7+
</title>
8+
<link href="https://remoteok.io/remote-dev+python-jobs" rel="stylesheet"></link>
9+
</head>
10+
11+
<body>
12+
<header>
13+
<h1>Remote World</h1>
14+
<h3>Couldn't add jobs</h3>
15+
</header>
16+
<main>
17+
<h4>{{error}}</h4>
18+
<a href="/">&larr; Go back</a>
19+
</main>
20+
</body>
21+
22+
</html>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<!DOCTYPE html>
2+
<html>
3+
4+
<head>
5+
<title>
6+
Remote World
7+
</title>
8+
<link href="https://remoteok.io/remote-dev+python-jobs"rel="stylesheet"></link>
9+
</head>
10+
11+
<body>
12+
<header>
13+
<h1>Remote World</h1>
14+
<h3>A service to aggregate all your favourite remote jobs</h3>
15+
</header>
16+
<main>
17+
<article>
18+
<form action="/add" method="post">
19+
<h4>Add a subreddit:</h4>
20+
<h5>Write the name without /r/</h5>
21+
<input placeholder="Write a subreddit name" required name="new-subreddit" />
22+
<button type="submit">Add</button>
23+
</form>
24+
</article>
25+
<hr />
26+
<article>
27+
<form action="/read">
28+
<h4>Select the subreddits you're interested on:</h4>
29+
<ul>
30+
{% for subreddit in subreddits %}
31+
<li>
32+
<input type="checkbox" name="{{subreddit}}" id="{{subreddit}}" />
33+
<label for="{{subreddit}}">
34+
r/{{subreddit}}
35+
</label>
36+
</li>
37+
{% endfor %}
38+
</ul>
39+
<button type="submit">Aggregate</button>
40+
</form>
41+
</article>
42+
</main>
43+
</body>
44+
45+
</html>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
<!DOCTYPE html>
2+
<html>
3+
4+
<head>
5+
<title>
6+
Remote World
7+
</title>
8+
<link href="https://remoteok.io/remote-dev+python-jobs" rel="stylesheet"></link>
9+
</head>
10+
11+
<body>
12+
<header>
13+
<h1><a href="/">Remote World</a></h1>
14+
<h3>Reading: {% for subreddit in selected%}r/{{subreddit}} {% endfor %} </h3>
15+
</header>
16+
<main>
17+
{% for post in posts %}
18+
<div>
19+
<h3><a href="https://reddit.com{{post.link}}" target="_blank">{{post.title}}</a></h3>
20+
<h4>{{post.votes}} upvotes · r/{{post.subreddit}}</h4>
21+
<hr />
22+
</div>
23+
{% endfor %}
24+
</main>
25+
</body>
26+
27+
</html>
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
URL = f"https://weworkremotely.com/remote-jobs/search?term=python"
5+
6+
# 어떤거 구해올지 살펴보기
7+
8+
result = requests.get(URL)
9+
soup = BeautifulSoup(result.text, "html.parser")
10+
html = soup.find("div", {"class": "jobs"})
11+
12+
13+
def extract_job(html):
14+
title = html.find("span", {"class": "title"})
15+
company = html.find("span", {"class": "company"})
16+
location = html.find("span", {"class": "region company"})
17+
company.get_text(strip=True)
18+
location.get_text(strip=True)
19+
link = html.find_all("li", {"class": "feature"}).find_all("a")
20+
return {
21+
"title": title,
22+
"company": company.string,
23+
"location": location.string,
24+
"link": link
25+
}
26+
27+
28+
def extract_jobs(last_page):
29+
jobs = []
30+
for page in range(last_page):
31+
result = requests.get(f"{URL}&pg={page + 1}")
32+
soup = BeautifulSoup(result.text, "html.parser")
33+
results = soup.find_all("div", {"class": "-job"})
34+
for result in results:
35+
job = extract_job(result)
36+
jobs.append(job)
37+
return jobs
38+
39+
40+
def remote_jobs():
41+
jobs = extract_jobs()
42+
return jobs
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
5+
6+
7+
def extract_post(html, subreddit):
8+
votes = html.find("div", {"class":"_1rZYMD_4xY3gRcSS3p8ODO"})
9+
if votes:
10+
votes = votes.string
11+
title = html.find("h3", {"class":"_eYtD2XCVieq6emjKBH3m"})
12+
if title:
13+
title = title.string
14+
link = html.find("a", {"class":"SQnoC3ObvgnGjWt90zD9Z _2INHSNB8V5eaWp4P0rY_mE"})
15+
if link:
16+
link = link['href']
17+
if votes and title and link:
18+
return {'votes':int(votes), 'title':title, 'link':link, 'subreddit':subreddit}
19+
else:
20+
return None
21+
22+
def scrape_subreddit(subreddit):
23+
all_posts = []
24+
try:
25+
url = f"https://www.reddit.com/r/{subreddit}/top/?t=month"
26+
request = requests.get(url, headers=headers)
27+
soup = BeautifulSoup(request.text, "html.parser")
28+
post_container = soup.find("div", {"class":"rpBJOHq2PR60pnwJlUyP0"})
29+
if post_container:
30+
posts = post_container.find_all("div", {"class": None}, recursive=False)
31+
for post in posts:
32+
exctracted_post = extract_post(post, subreddit)
33+
if exctracted_post:
34+
all_posts.append(exctracted_post)
35+
except Exception:
36+
pass
37+
return all_posts
38+
39+
def aggregate_subreddits(subreddits):
40+
aggregated = []
41+
for subreddit in subreddits:
42+
posts = scrape_subreddit(subreddit)
43+
aggregated = aggregated + posts
44+
return aggregated
45+
46+
47+
48+
def check_subreddit(to_check):
49+
try:
50+
check_request = requests.get(f"https://reddit.com/r/{to_check}", headers=headers)
51+
if check_request.status_code == 200:
52+
return True
53+
else:
54+
return False
55+
except Exception as e:
56+
return False
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import requests
2+
from bs4 import BeautifulSoup
3+
4+
URL = f"https://stackoverflow.com/jobs?r=true&q=python"
5+
6+
7+
def get_last_page():
8+
result = requests.get(URL)
9+
soup = BeautifulSoup(result.text, "html.parser")
10+
pages = soup.find("div", {"class":"s-pagination"}).find_all("a")
11+
last_page = pages[-2].get_text(strip=True)
12+
return int(last_page)
13+
14+
15+
def extract_job(html):
16+
title = html.find("h2", {"class":"fs-body3"}).find("a")["title"]
17+
company, location = html.find("h3", {"class":"fs-body1"}).find_all("span", recursive=False)
18+
company.get_text(strip=True)
19+
location.get_text(strip=True)
20+
job_id = html["data-jobid"]
21+
return {"title": title, "company": company.string, "location": location.string, "link": f"https://stackoverflow.com/jobs/{job_id}"}
22+
23+
24+
def extract_jobs(last_page):
25+
jobs = []
26+
for page in range(last_page):
27+
print(f"Scrapping SOF Page: {page}")
28+
result = requests.get(f"{URL}&pg={page + 1}")
29+
soup = BeautifulSoup(result.text, "html.parser")
30+
results = soup.find_all("div", {"class":"-job"})
31+
for result in results:
32+
job = extract_job(result)
33+
jobs.append(job)
34+
return jobs
35+
36+
37+
def get_jobs():
38+
last_page = get_last_page()
39+
jobs = extract_jobs(last_page)
40+
return jobs
41+

0 commit comments

Comments
 (0)