DAY 13 OF 14 PYTHON CHALLENGE

wook2124 · wook2124 · commit 8f8e4e7dcdc3 · 2020-06-07T05:53:39.000Z
diff --git a/Assignment 2/DAY 13 OF 14 PYTHON CHALLENGE.py b/Assignment 2/DAY 13 OF 14 PYTHON CHALLENGE.py
@@ -1,3 +1,8 @@
+from flask import Flask, render_template, request, redirect
+from scrapper import aggregate_subreddits, check_subreddit
+from remote import remote_jobs
+from sof import get_jobs
+
 """
 These are the URLs that will give you remote jobs for the word 'python'
 
@@ -6,4 +11,60 @@
 https://remoteok.io/remote-dev+python-jobs
 
 Good luck!
-"""
+"""
+
+remote = remote_jobs()
+sof = get_jobs()
+
+jobs = remote_jobs() + get_jobs()
+
+
+app = Flask("Remote World")
+
+subreddits = [
+    "javascript",
+    "reactjs",
+    "reactnative",
+    "programming",
+    "css",
+    "golang",
+    "flutter",
+    "rust",
+    "django"
+]
+
+@app.route("/")
+def home():
+  return render_template("home.html", subreddits=subreddits)
+
+
+@app.route("/read")
+def read():
+  selected = []
+  for subreddit in subreddits:
+    if subreddit in request.args:
+      selected.append(subreddit)
+  posts = aggregate_subreddits(selected)
+  posts.sort(key=lambda post: post['votes'], reverse=True)
+  return render_template("read.html", selected=selected, posts=posts)
+
+
+@app.route("/add",methods=['POST'])
+def add():
+  to_add = request.form.get('new-subreddit',None)
+  if to_add:
+    if "/" not in to_add:
+      exists = check_subreddit(to_add)
+      if exists:
+        subreddits.append(to_add)
+        return redirect("/")
+      else:
+        error = "That page does not exist."
+    else:
+      error = "Write the name without /r/"
+  else:
+    error = "Write a text."
+  return render_template("add-failed.html", error=error)
+    
+
+app.run(host="0.0.0.0")
diff --git a/Assignment 2/DAY 13 template/add-failed.html b/Assignment 2/DAY 13 template/add-failed.html
@@ -0,0 +1,22 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+	<title>
+    Remote World
+  </title>
+  <link href="https://remoteok.io/remote-dev+python-jobs" rel="stylesheet"></link>
+</head>
+
+<body>
+  <header>
+    <h1>Remote World</h1>
+    <h3>Couldn't add jobs</h3>
+  </header>
+  <main>
+    <h4>{{error}}</h4>
+    <a href="/">&larr; Go back</a>
+  </main>
+</body>
+
+</html>
diff --git a/Assignment 2/DAY 13 template/home.html b/Assignment 2/DAY 13 template/home.html
@@ -0,0 +1,45 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+	<title>
+    Remote World
+  </title>
+  <link href="https://remoteok.io/remote-dev+python-jobs"rel="stylesheet"></link>
+</head>
+
+<body>
+  <header>
+    <h1>Remote World</h1>
+    <h3>A service to aggregate all your favourite remote jobs</h3>
+  </header>
+  <main>
+    <article>
+      <form action="/add" method="post">
+        <h4>Add a subreddit:</h4>
+        <h5>Write the name without /r/</h5>
+        <input placeholder="Write a subreddit name" required name="new-subreddit" />
+        <button type="submit">Add</button>
+      </form>
+    </article>
+    <hr />
+    <article>
+      <form action="/read">
+        <h4>Select the subreddits you're interested on:</h4>
+        <ul>
+          {% for subreddit in subreddits %}
+            <li>
+              <input type="checkbox" name="{{subreddit}}" id="{{subreddit}}" />
+              <label for="{{subreddit}}">
+              r/{{subreddit}}
+              </label>
+            </li>
+          {% endfor %}
+        </ul>
+        <button type="submit">Aggregate</button>
+      </form>
+    </article>
+  </main>
+</body>
+
+</html>
diff --git a/Assignment 2/DAY 13 template/read.html b/Assignment 2/DAY 13 template/read.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+	<title>
+    Remote World
+  </title>
+  <link href="https://remoteok.io/remote-dev+python-jobs" rel="stylesheet"></link>
+</head>
+
+<body>
+  <header>
+    <h1><a href="/">Remote World</a></h1>
+    <h3>Reading: {% for subreddit in selected%}r/{{subreddit}}  {% endfor %} </h3>
+  </header>
+  <main>
+    {% for post in posts %}
+      <div>
+          <h3><a href="https://reddit.com{{post.link}}" target="_blank">{{post.title}}</a></h3>
+          <h4>{{post.votes}} upvotes ·  r/{{post.subreddit}}</h4>
+        <hr />
+      </div>
+    {% endfor %}
+  </main>
+</body>
+
+</html>
diff --git a/Assignment 2/DAY 13 template/remote.py b/Assignment 2/DAY 13 template/remote.py
@@ -0,0 +1,42 @@
+import requests
+from bs4 import BeautifulSoup
+
+URL = f"https://weworkremotely.com/remote-jobs/search?term=python"
+
+# 어떤거 구해올지 살펴보기
+
+result = requests.get(URL)
+soup = BeautifulSoup(result.text, "html.parser")
+html = soup.find("div", {"class": "jobs"})
+
+
+def extract_job(html):
+    title = html.find("span", {"class": "title"})
+    company = html.find("span", {"class": "company"})
+    location = html.find("span", {"class": "region company"})
+    company.get_text(strip=True)
+    location.get_text(strip=True)
+    link = html.find_all("li", {"class": "feature"}).find_all("a")
+    return {
+        "title": title,
+        "company": company.string,
+        "location": location.string,
+        "link": link
+    }
+
+
+def extract_jobs(last_page):
+    jobs = []
+    for page in range(last_page):
+        result = requests.get(f"{URL}&pg={page + 1}")
+        soup = BeautifulSoup(result.text, "html.parser")
+        results = soup.find_all("div", {"class": "-job"})
+        for result in results:
+            job = extract_job(result)
+            jobs.append(job)
+    return jobs
+
+
+def remote_jobs():
+    jobs = extract_jobs()
+    return jobs
diff --git a/Assignment 2/DAY 13 template/scrapper.py b/Assignment 2/DAY 13 template/scrapper.py
@@ -0,0 +1,56 @@
+import requests
+from bs4 import BeautifulSoup
+
+headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
+
+
+def extract_post(html, subreddit):
+  votes = html.find("div", {"class":"_1rZYMD_4xY3gRcSS3p8ODO"})
+  if votes:
+    votes = votes.string
+  title = html.find("h3", {"class":"_eYtD2XCVieq6emjKBH3m"})
+  if title:
+    title = title.string
+  link = html.find("a", {"class":"SQnoC3ObvgnGjWt90zD9Z _2INHSNB8V5eaWp4P0rY_mE"})
+  if link:
+    link = link['href']
+  if votes and title and link:
+    return {'votes':int(votes), 'title':title, 'link':link, 'subreddit':subreddit}
+  else:
+    return None
+
+def scrape_subreddit(subreddit):
+  all_posts = []
+  try:
+    url = f"https://www.reddit.com/r/{subreddit}/top/?t=month"
+    request = requests.get(url, headers=headers)
+    soup = BeautifulSoup(request.text, "html.parser")
+    post_container = soup.find("div", {"class":"rpBJOHq2PR60pnwJlUyP0"})
+    if post_container:
+      posts = post_container.find_all("div", {"class": None}, recursive=False)
+      for post in posts:
+        exctracted_post = extract_post(post, subreddit)
+        if exctracted_post:
+          all_posts.append(exctracted_post)
+  except Exception:
+    pass
+  return all_posts
+
+def aggregate_subreddits(subreddits):
+  aggregated = []
+  for subreddit in subreddits:
+    posts = scrape_subreddit(subreddit)
+    aggregated = aggregated + posts
+  return aggregated
+
+
+
+def check_subreddit(to_check):
+  try:
+    check_request = requests.get(f"https://reddit.com/r/{to_check}", headers=headers)
+    if check_request.status_code == 200:
+      return True
+    else:
+      return False
+  except Exception as e:
+    return False
diff --git a/Assignment 2/DAY 13 template/sof.py b/Assignment 2/DAY 13 template/sof.py
@@ -0,0 +1,41 @@
+import requests
+from bs4 import BeautifulSoup
+
+URL = f"https://stackoverflow.com/jobs?r=true&q=python"
+
+
+def get_last_page():
+  result = requests.get(URL)
+  soup = BeautifulSoup(result.text, "html.parser")
+  pages = soup.find("div", {"class":"s-pagination"}).find_all("a")
+  last_page = pages[-2].get_text(strip=True)
+  return int(last_page)
+
+
+def extract_job(html):
+  title = html.find("h2", {"class":"fs-body3"}).find("a")["title"]
+  company, location = html.find("h3", {"class":"fs-body1"}).find_all("span", recursive=False)
+  company.get_text(strip=True)
+  location.get_text(strip=True)
+  job_id = html["data-jobid"]
+  return {"title": title, "company": company.string, "location": location.string, "link": f"https://stackoverflow.com/jobs/{job_id}"}
+
+
+def extract_jobs(last_page):
+  jobs = []
+  for page in range(last_page):
+    print(f"Scrapping SOF Page: {page}")
+    result = requests.get(f"{URL}&pg={page + 1}")
+    soup = BeautifulSoup(result.text, "html.parser")
+    results = soup.find_all("div", {"class":"-job"})
+    for result in results:
+      job = extract_job(result)
+      jobs.append(job)
+  return jobs
+
+
+def get_jobs():
+  last_page = get_last_page()
+  jobs = extract_jobs(last_page)
+  return jobs
+