From c7cf76436f3fcf02e1acd6e788aab27af6da4bb3 Mon Sep 17 00:00:00 2001 From: shashankvemuri Date: Mon, 8 Jan 2024 20:30:25 -0500 Subject: [PATCH] yahoo finance 404 workaround --- machine_learning/kmeans_clustering.py | 8 ++++-- .../geometric_brownian_motion.py | 11 ++++---- portfolio_strategies/sma_trading_strategy.py | 3 +-- stock_analysis/capm_analysis.py | 8 ++++-- stock_analysis/intrinsic_value.py | 5 ++-- stock_analysis/seasonal_stock_analysis.py | 27 +++++-------------- stock_data/green_line_test.py | 6 +---- stock_data/send_top_movers.py | 16 +++++++++-- 8 files changed, 43 insertions(+), 41 deletions(-) diff --git a/machine_learning/kmeans_clustering.py b/machine_learning/kmeans_clustering.py index d808687..087edde 100755 --- a/machine_learning/kmeans_clustering.py +++ b/machine_learning/kmeans_clustering.py @@ -8,11 +8,15 @@ import datetime as dt import requests from pandas_datareader import data as pdr -from yahoo_fin import stock_info as si +import sys +import os +parent_dir = os.path.dirname(os.getcwd()) +sys.path.append(parent_dir) +import tickers as ti # Loading the data yf.pdr_override() -stocks = si.tickers_dow()[10:30] # Shorten the code for readability +stocks = ti.tickers_dow() # Shorten the code for readability start = dt.datetime(2010, 1, 1) now = dt.datetime.now() diff --git a/portfolio_strategies/geometric_brownian_motion.py b/portfolio_strategies/geometric_brownian_motion.py index b96c7fb..e093cf6 100755 --- a/portfolio_strategies/geometric_brownian_motion.py +++ b/portfolio_strategies/geometric_brownian_motion.py @@ -2,8 +2,7 @@ import matplotlib.pyplot as plt import numpy as np import math -import yahoo_fin.stock_info as si -from pandas_datareader import DataReader +import pandas_datareader.data as pdr import pandas as pd import datetime from pylab import rcParams @@ -20,8 +19,8 @@ index = '^GSPC' # Fetch stock and index data from Yahoo Finance -df = DataReader(stock, 'yahoo', start_date, end_date) -dfb = DataReader(index, 'yahoo', start_date, end_date) +df = pdr.get_data_yahoo(stock, start_date, end_date) +dfb = pdr.get_data_yahoo(index, start_date, end_date) # Resample the data to monthly frequency rts = df.resample('M').last() @@ -64,7 +63,7 @@ def simulate(self): # Set the parameters for the Geometric Brownian Motion simulation n = 20 # Number of simulations -initial_price = si.get_live_price(stock) # Use the live stock price as the initial price +initial_price = df['Adj Close'][-1] # Use the live stock price as the initial price drift = .24 # Use a constant drift value volatility = np.sqrt(covmat[0,0]) # Use the volatility of the stock time_period = 1 / 365 # Daily @@ -80,7 +79,7 @@ def simulate(self): # Add the title, legend, and axis labels plt.title(f'Geometric Brownian Motion for {stock.upper()}') -plt.axhline(y=si.get_live_price(stock), color='r') +plt.axhline(y=initial_price, color='r') plt.xlabel('Time') plt.ylabel('Price') plt.show() \ No newline at end of file diff --git a/portfolio_strategies/sma_trading_strategy.py b/portfolio_strategies/sma_trading_strategy.py index c99e212..51a3788 100755 --- a/portfolio_strategies/sma_trading_strategy.py +++ b/portfolio_strategies/sma_trading_strategy.py @@ -6,7 +6,6 @@ import matplotlib.pyplot as plt import mplfinance as mpf from finta import TA -from yahoo_fin import stock_info as si # Define time range num_of_years = 10 @@ -14,10 +13,10 @@ end = dt.datetime.now() stock = 'NFLX' -current_price = round(si.get_live_price(stock), 2) # Download historical stock data df = yf.download(stock, start, end, interval='1d') +current_price = round(df['Adj Close'][-1], 2) # Calculate Simple Moving Average(SMA) short_sma = 20 diff --git a/stock_analysis/capm_analysis.py b/stock_analysis/capm_analysis.py index a738ee3..21ce29e 100755 --- a/stock_analysis/capm_analysis.py +++ b/stock_analysis/capm_analysis.py @@ -5,7 +5,11 @@ import datetime from socket import gaierror from pandas_datareader._utils import RemoteDataError -from yahoo_fin import stock_info as si +import sys +import os +parent_dir = os.path.dirname(os.getcwd()) +sys.path.append(parent_dir) +import tickers as ti # Define risk-free return rate risk_free_return = 0.02 @@ -15,7 +19,7 @@ pd.set_option('display.max_columns', None) # Get all tickers in NASDAQ stock exchange -nasdaq_tickers = si.tickers_nasdaq() +nasdaq_tickers = ti.tickers_nasdaq() # Define the ticker for index index_ticker = '^GSPC' diff --git a/stock_analysis/intrinsic_value.py b/stock_analysis/intrinsic_value.py index 7e9e0d2..64d33fa 100755 --- a/stock_analysis/intrinsic_value.py +++ b/stock_analysis/intrinsic_value.py @@ -5,7 +5,7 @@ from bs4 import BeautifulSoup as bs import numpy as np import pandas as pd -from yahoo_fin import stock_info as si +import pandas_datareader.data as pdr # Setting pandas options pd.set_option('float_format', '{:f}'.format) @@ -16,7 +16,8 @@ # Parameters ticker = 'AAPL' # The stock ticker to get its intrinsic value -current_price = si.get_live_price(ticker) +df = pdr.get_data_yahoo(ticker) +current_price = df['Adj Close'][-1] # Retrieving JSON data def json_data(url): diff --git a/stock_analysis/seasonal_stock_analysis.py b/stock_analysis/seasonal_stock_analysis.py index 3c7974c..2bb2655 100644 --- a/stock_analysis/seasonal_stock_analysis.py +++ b/stock_analysis/seasonal_stock_analysis.py @@ -4,35 +4,22 @@ import matplotlib.pyplot as plt import numpy as np import yfinance as yf -from pandas_datareader import data as pdr -import xlsxwriter import requests -from yahoo_fin import stock_info as si import pickle import bs4 as bs +import requests # You need to change this to a convenient spot on your own hard drive. my_path = "" threshold = 0.80 -# Download a list of the S&P 500 components -def save_spx_tickers(): - resp = requests.get("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies") - soup = bs.BeautifulSoup(resp.text, "lxml") - table = soup.find("table", {"class": "wikitable sortable"}) - tickers = [] - for row in table.findAll("tr")[1:]: - ticker = row.find_all("td")[0].text.strip() - tickers.append(ticker) - with open("spxTickers.pickle", "wb") as f: - pickle.dump(tickers, f) - return tickers -sp500_tickers = save_spx_tickers() - -# Make the ticker symbols readable by Yahoo Finance -sp500_tickers = [item.replace(".", "-") for item in sp500_tickers] +# Scrape a list of the S&P 500 components +url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies' +html = requests.get(url).text +sp_df = pd.read_html(html, header=0)[0] +sp500_tickers = sp_df['Symbol'].tolist() -# Upload a list of the S&P 500 components downloaded from Yahoo. +# Upload a list of the S&P 500 components downloaded from Wikipedia. mylist = [] mylist2 = [] df_sp500_tickers = pd.DataFrame(list(zip(sp500_tickers)), columns=["Symbol"]) diff --git a/stock_data/green_line_test.py b/stock_data/green_line_test.py index 95cb088..3d0bacc 100644 --- a/stock_data/green_line_test.py +++ b/stock_data/green_line_test.py @@ -4,7 +4,6 @@ from pandas_datareader import DataReader import matplotlib.pyplot as plt from pylab import rcParams -import yahoo_fin.stock_info as si # Set the start date for historical data start = dt.datetime(1980,12,1) @@ -16,12 +15,9 @@ # Continue asking user for a stock ticker until they enter 'quit' while stock != 'quit': - - # Get the current stock price - price = si.get_live_price(stock) - # Get the historical stock data df = DataReader(stock, 'yahoo', start, now) + price = df['Adj Close'][-1] # Remove rows where volume is less than 1000 df.drop(df[df["Volume"]<1000].index, inplace=True) diff --git a/stock_data/send_top_movers.py b/stock_data/send_top_movers.py index 7799232..0c43612 100755 --- a/stock_data/send_top_movers.py +++ b/stock_data/send_top_movers.py @@ -4,13 +4,25 @@ from email.mime.multipart import MIMEMultipart from email.mime.base import MIMEBase from email import encoders -import yahoo_fin.stock_info as si import time import datetime import os +import requests +from bs4 import BeautifulSoup +import pandas as pd + +# Get top winner stocks for the day +def scrape_top_winners(): + url = 'https://finance.yahoo.com/gainers/' + response = requests.get(url) + soup = BeautifulSoup(response.text, 'html.parser') + + df = pd.read_html(str(soup), attrs={'class': 'W(100%)'})[0] + df = df.drop(columns=['52 Week High']) + return df # get top gainers data -df = si.get_day_gainers() +df = scrape_top_winners() df_filtered = df[df['% Change']>=5] # get today's date and use it to create a file name