Merge pull request #119 from antoniouaa/master

AdityaJ7 · web-flow · commit 7c6fba78c190 · 2020-10-03T20:33:16.000+05:30
Stock Scraper feature
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
+**/.vscode
+**/venv
 .idea/
 .DS_Store
diff --git a/Scripts/API/.gitignore b/Scripts/API/.gitignore
@@ -0,0 +1,2 @@
+**/venv
+**.vscode
diff --git a/Scripts/Web_Scrappers/Stock_Scraper/README.md b/Scripts/Web_Scrappers/Stock_Scraper/README.md
@@ -0,0 +1,27 @@
+# S&P 500 scraper
+
+This is a simple web scraper that collects information about the 500 companies listed in the S&P index
+
+![](stock_scraper.gif)
+
+
+## Installation
+
+Create a virtual environment for the script:
+
+    python -m venv venv
+
+and activate it:
+
+    .\venv\Scripts\activate # Windows
+or 
+    
+    source venv\bin\activate # Linux
+
+Install the requirements:
+
+    pip install -r requirements.txt
+
+To run the script: 
+
+    python pycon_proposals.py
diff --git a/Scripts/Web_Scrappers/Stock_Scraper/requirements.txt b/Scripts/Web_Scrappers/Stock_Scraper/requirements.txt
diff --git a/Scripts/Web_Scrappers/Stock_Scraper/scrape_stocks.py b/Scripts/Web_Scrappers/Stock_Scraper/scrape_stocks.py
@@ -0,0 +1,61 @@
+from datetime import datetime, timedelta
+import requests
+import yfinance
+import pandas as pd
+from bs4 import BeautifulSoup
+from contextlib import contextmanager
+import sys
+import os
+from pathlib import Path
+
+WIKI_ENDPOINT = "https://en.wikipedia.org/w/api.php"
+
+
+@contextmanager
+def suppress_console():
+    with open(os.devnull, "w") as devnull:
+        old_stdout = sys.stdout
+        sys.stdout = devnull
+        try:
+            yield
+        finally:
+            sys.stdout = old_stdout
+
+
+def fetch_ticker_names():
+    params = {
+        "action": "parse",
+        "format": "json",
+        "page": "List of S&P 500 companies",
+        "prop": "text",
+    }
+    response = requests.get(WIKI_ENDPOINT, params=params)
+    soup = BeautifulSoup(response.content, "lxml")
+    symbols = set()
+    for item in soup.find_all("tr")[:501]:
+        cells = item.find_all("td")[:2]
+        try:
+            symbol, company = cells[0].a.text, cells[1].a.text
+        except:
+            continue
+        tupl = (symbol, company)
+        symbols.add(tupl)
+    return list(symbols)
+
+
+def fetch_daily_data_for_ticker(symbol):
+    end_date = datetime.today()
+    start_date = end_date - timedelta(weeks=12)
+    return yfinance.download(symbol, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
+
+
+if __name__ == "__main__":
+    tickers = fetch_ticker_names()
+    if input(">>> Save Data to Drive? ").lower() in ["y", "yes"]:
+        Path("CSVs").mkdir(parents=True, exist_ok=True)
+        for i, (symbol, company) in enumerate(tickers, start=1):
+            with suppress_console():
+                data = fetch_daily_data_for_ticker(symbol)
+                data.to_csv(f"CSVs\\{symbol}.csv")
+            print(f"File#{i} saved", end="\r")
+        print("\nFiles scraped and downloaded!")
diff --git a/Scripts/Web_Scrappers/Stock_Scraper/stock_scraper.gif b/Scripts/Web_Scrappers/Stock_Scraper/stock_scraper.gif

-Original file line number
+Diff line change
@@ @@ -1,2 +1,4 @@ @@
 +**/.vscode
 +**/venv
 .idea/
 .DS_Store