-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyeet.py
160 lines (126 loc) · 6.34 KB
/
yeet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
import pandas as pd
import numpy as np
# Define the directory containing OHLC data for all tickers
all_tickers_dir = 'data/all-tickers'
merged_data_file = 'data/merged_sp500_data.csv'
# Function to calculate cumulative return
def calculate_cumulative_return(data, window):
return (data.pct_change(window) + 1).prod() - 1
# Function to calculate volatility
def calculate_volatility(data, window):
return data.pct_change().rolling(window=window).std()
# Function to calculate Sharpe Ratio
def calculate_sharpe_ratio(cumulative_return, volatility):
# Align cumulative return and volatility
aligned_cr, aligned_vol = cumulative_return.align(volatility, join='inner')
# Check if the aligned DataFrames are empty
if aligned_cr.empty or aligned_vol.empty:
print("Error: Aligned cumulative return or volatility DataFrame is empty.")
return pd.DataFrame() # Return empty DataFrame
# Avoid division by zero by replacing 0 in volatility with NaN
return aligned_cr / np.where(aligned_vol == 0, np.nan, aligned_vol)
# Function to load OHLC data for each ticker
def load_ohlc_data(ticker, all_tickers_dir):
historical_file = f"{all_tickers_dir}/{ticker}_historical_data.csv"
try:
data = pd.read_csv(historical_file)
if 'date' in data.columns:
data.rename(columns={'date': 'Date'}, inplace=True)
# Ensure the 'Date' column is parsed as dates
data['Date'] = pd.to_datetime(data['Date'])
return data
except Exception as e:
print(f"Error loading data for {ticker}: {e}")
return None
# Function to screen stocks based on a metric
def screen_stocks(metric_df, top_n=50, ascending=False):
if isinstance(metric_df, pd.DataFrame) and not metric_df.empty:
# Sort and select top_n stocks
return metric_df.apply(lambda col: col.sort_values(ascending=ascending).head(top_n).index)
else:
print(f"Error: Expected a non-empty DataFrame but received {type(metric_df)}.")
return pd.Index([])
# Step 1: Load the list of tickers from 'merged_sp500_data.csv'
def load_tickers_from_file(merged_data_file):
try:
# Load the merged data
merged_data = pd.read_csv(merged_data_file)
# The column names in the merged data represent the tickers
tickers = list(merged_data.columns[1:]) # Skip the 'Date' column
return tickers
except Exception as e:
print(f"Error loading tickers from {merged_data_file}: {e}")
return []
# Load tickers from the merged data file
tickers = load_tickers_from_file(merged_data_file)
# Initialize a dictionary to store calculated metrics for each ticker
metrics_results = {}
# Define window sizes for calculations
window_sizes = [5, 10, 20, 50]
# Step 2: Calculate cumulative return, volatility, and Sharpe ratio for each stock
for ticker in tickers:
print(f"Processing {ticker}...")
# Load OHLC data for the ticker
stock_data = load_ohlc_data(ticker, all_tickers_dir)
if stock_data is None or stock_data.empty:
print(f"No data found for {ticker}. Skipping.")
continue # Skip if no data
print(f"\nLoaded {ticker} Historical Data:\n", stock_data.head()) # Print head of loaded data for inspection
# Use 'Adj Close' column for calculations
adj_close_data = stock_data[['Adj Close']].dropna()
ticker_metrics = {}
# Calculate metrics for each window size
for window in window_sizes:
if len(adj_close_data) < window: # Check if there's enough data for the window
print(f"Not enough data for window size {window} for {ticker}. Skipping this window.")
continue
print(f"Calculating metrics for window size: {window} for {ticker}")
cr_df = calculate_cumulative_return(adj_close_data, window)
volatility_df = calculate_volatility(adj_close_data, window)
# Drop NaNs
cr_df = cr_df.dropna()
volatility_df = volatility_df.dropna()
if cr_df.empty or volatility_df.empty:
print(f"Error: One or more metric DataFrames are empty for window size {window}. Skipping this window for {ticker}.")
continue
sharpe_df = calculate_sharpe_ratio(cr_df, volatility_df)
if sharpe_df.empty:
print(f"Error: Sharpe ratio DataFrame is empty for window size {window}. Skipping this window for {ticker}.")
continue
# Store results for this ticker and window
ticker_metrics[window] = {
'Cumulative Return': cr_df,
'Volatility': volatility_df,
'Sharpe Ratio': sharpe_df
}
if ticker_metrics:
metrics_results[ticker] = ticker_metrics
# Step 3: Screening stocks based on cumulative return, volatility, and Sharpe ratio
# Loop through each window size and apply screening
for window in window_sizes:
print(f"\nScreening stocks for window size: {window}")
# Collect the data for each ticker
cumulative_returns = []
volatility = []
sharpe_ratios = []
for ticker, ticker_data in metrics_results.items():
if window in ticker_data:
cumulative_returns.append(ticker_data[window]['Cumulative Return'])
volatility.append(ticker_data[window]['Volatility'])
sharpe_ratios.append(ticker_data[window]['Sharpe Ratio'])
if cumulative_returns and volatility and sharpe_ratios:
# Concatenate data for each metric
cr_df = pd.concat(cumulative_returns, axis=1)
vol_df = pd.concat(volatility, axis=1)
sr_df = pd.concat(sharpe_ratios, axis=1)
# Perform stock screening for each metric
top_cr_stocks = screen_stocks(cr_df, top_n=50, ascending=False) # High cumulative return
top_vol_stocks = screen_stocks(vol_df, top_n=50, ascending=True) # Low volatility
top_sr_stocks = screen_stocks(sr_df, top_n=50, ascending=False) # High Sharpe ratio
# Print the results (or further process the screened stocks)
print(f"\nTop 50 stocks by Cumulative Return for window {window}:\n", top_cr_stocks)
print(f"\nTop 50 stocks by Volatility for window {window}:\n", top_vol_stocks)
print(f"\nTop 50 stocks by Sharpe Ratio for window {window}:\n", top_sr_stocks)
else:
print(f"Error: No data available for window size {window}.")