Skip to content
This repository was archived by the owner on Dec 22, 2023. It is now read-only.

Commit 7c6fba7

Browse files
authored
Merge pull request #119 from antoniouaa/master
Stock Scraper feature
2 parents bc1b84e + 248b93a commit 7c6fba7

File tree

6 files changed

+92
-0
lines changed

6 files changed

+92
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1+
**/.vscode
2+
**/venv
13
.idea/
24
.DS_Store

Scripts/API/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
**/venv
2+
**.vscode
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# S&P 500 scraper
2+
3+
This is a simple web scraper that collects information about the 500 companies listed in the S&P index
4+
5+
![](stock_scraper.gif)
6+
7+
8+
## Installation
9+
10+
Create a virtual environment for the script:
11+
12+
python -m venv venv
13+
14+
and activate it:
15+
16+
.\venv\Scripts\activate # Windows
17+
or
18+
19+
source venv\bin\activate # Linux
20+
21+
Install the requirements:
22+
23+
pip install -r requirements.txt
24+
25+
To run the script:
26+
27+
python pycon_proposals.py
Binary file not shown.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from datetime import datetime, timedelta
2+
import requests
3+
import yfinance
4+
import pandas as pd
5+
from bs4 import BeautifulSoup
6+
from contextlib import contextmanager
7+
import sys
8+
import os
9+
from pathlib import Path
10+
11+
WIKI_ENDPOINT = "https://en.wikipedia.org/w/api.php"
12+
13+
14+
@contextmanager
15+
def suppress_console():
16+
with open(os.devnull, "w") as devnull:
17+
old_stdout = sys.stdout
18+
sys.stdout = devnull
19+
try:
20+
yield
21+
finally:
22+
sys.stdout = old_stdout
23+
24+
25+
def fetch_ticker_names():
26+
params = {
27+
"action": "parse",
28+
"format": "json",
29+
"page": "List of S&P 500 companies",
30+
"prop": "text",
31+
}
32+
response = requests.get(WIKI_ENDPOINT, params=params)
33+
soup = BeautifulSoup(response.content, "lxml")
34+
symbols = set()
35+
for item in soup.find_all("tr")[:501]:
36+
cells = item.find_all("td")[:2]
37+
try:
38+
symbol, company = cells[0].a.text, cells[1].a.text
39+
except:
40+
continue
41+
tupl = (symbol, company)
42+
symbols.add(tupl)
43+
return list(symbols)
44+
45+
46+
def fetch_daily_data_for_ticker(symbol):
47+
end_date = datetime.today()
48+
start_date = end_date - timedelta(weeks=12)
49+
return yfinance.download(symbol, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
50+
51+
52+
if __name__ == "__main__":
53+
tickers = fetch_ticker_names()
54+
if input(">>> Save Data to Drive? ").lower() in ["y", "yes"]:
55+
Path("CSVs").mkdir(parents=True, exist_ok=True)
56+
for i, (symbol, company) in enumerate(tickers, start=1):
57+
with suppress_console():
58+
data = fetch_daily_data_for_ticker(symbol)
59+
data.to_csv(f"CSVs\\{symbol}.csv")
60+
print(f"File#{i} saved", end="\r")
61+
print("\nFiles scraped and downloaded!")
Loading

0 commit comments

Comments
 (0)