Skip to content
This repository was archived by the owner on Dec 22, 2023. It is now read-only.

Stock Scraper feature #119

Merged
merged 17 commits into from
Oct 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
**/.vscode
**/venv
.idea/
.DS_Store
2 changes: 2 additions & 0 deletions Scripts/API/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
**/venv
**.vscode
27 changes: 27 additions & 0 deletions Scripts/Web_Scrappers/Stock_Scraper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# S&P 500 scraper

This is a simple web scraper that collects information about the 500 companies listed in the S&P index

![](stock_scraper.gif)


## Installation

Create a virtual environment for the script:

python -m venv venv

and activate it:

.\venv\Scripts\activate # Windows
or

source venv\bin\activate # Linux

Install the requirements:

pip install -r requirements.txt

To run the script:

python pycon_proposals.py
Binary file added Scripts/Web_Scrappers/Stock_Scraper/requirements.txt
Binary file not shown.
61 changes: 61 additions & 0 deletions Scripts/Web_Scrappers/Stock_Scraper/scrape_stocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from datetime import datetime, timedelta
import requests
import yfinance
import pandas as pd
from bs4 import BeautifulSoup
from contextlib import contextmanager
import sys
import os
from pathlib import Path

WIKI_ENDPOINT = "https://en.wikipedia.org/w/api.php"


@contextmanager
def suppress_console():
with open(os.devnull, "w") as devnull:
old_stdout = sys.stdout
sys.stdout = devnull
try:
yield
finally:
sys.stdout = old_stdout


def fetch_ticker_names():
params = {
"action": "parse",
"format": "json",
"page": "List of S&P 500 companies",
"prop": "text",
}
response = requests.get(WIKI_ENDPOINT, params=params)
soup = BeautifulSoup(response.content, "lxml")
symbols = set()
for item in soup.find_all("tr")[:501]:
cells = item.find_all("td")[:2]
try:
symbol, company = cells[0].a.text, cells[1].a.text
except:
continue
tupl = (symbol, company)
symbols.add(tupl)
return list(symbols)


def fetch_daily_data_for_ticker(symbol):
end_date = datetime.today()
start_date = end_date - timedelta(weeks=12)
return yfinance.download(symbol, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))


if __name__ == "__main__":
tickers = fetch_ticker_names()
if input(">>> Save Data to Drive? ").lower() in ["y", "yes"]:
Path("CSVs").mkdir(parents=True, exist_ok=True)
for i, (symbol, company) in enumerate(tickers, start=1):
with suppress_console():
data = fetch_daily_data_for_ticker(symbol)
data.to_csv(f"CSVs\\{symbol}.csv")
print(f"File#{i} saved", end="\r")
print("\nFiles scraped and downloaded!")
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.