fix bugs

blkpvnthr · Nov 28, 2024 · e6d7058 · e6d7058
1 parent 7d9fb2a
commit e6d7058
Showing 1 changed file with 31 additions and 12 deletions.
diff --git a/step_3.py b/step_3.py
@@ -1,24 +1,40 @@
 import pandas as pd
 import yfinance as yf
 import numpy as np
-from pypfopt.efficient_frontier import EfficientFrontier
-from pypfopt import risk_models, expected_returns
 from datetime import datetime, timedelta
 
-start = datetime.now() - timedelta(days=(365*5))
+# Define the start and end dates for historical data
+start = datetime.now() - timedelta(days=(365 * 5))
 end = datetime.now()
-"""Covariance is a measure the direction of movement from 2 variables. A positive covariance
-means that the combination of two stocks in an exclusive portfolio tends to move in the same
-direction. A negative covariance shows that two stocks move in opposite directions,in one
-direction, if one stock increases in profit the other will decrease in return. 
-create a covariance matrix using optimal_portfolio_candidates.csv"""
 
 # Step 1: Load the optimal_portfolio_candidates.csv file
-df_candidates = pd.read_csv('optimal_portfolio_candidates.csv')
+# Load the CSV file
+file_path = 'optimal_portfolio_candidates.csv'
+df_candidates = pd.read_csv(file_path)
+
+# Clean the 'ticker' column to remove punctuation
+# In this case, there doesn't seem to be punctuation, but if needed:
+df_candidates['ticker'] = df_candidates['ticker'].str.replace(r'[^\w\s]', '', regex=True)
+
+# Extract the tickers as a list
 tickers = df_candidates['ticker'].tolist()
 
+# Save tickers to a file, separated by new lines
+with open('tickers.txt', 'w') as f:
+    f.write('\n'.join(tickers))
+
+print("Tickers extracted and saved to 'tickers.txt':")
+print("\n".join(tickers))
 # Step 2: Download historical stock data for the selected tickers
-data = yf.download(tickers, start=start, end=end)['Adj Close']
+try:
+    data = yf.download(tickers, start=start, end=end)['Adj Close']
+except Exception as e:
+    print(f"Error downloading data: {e}")
+    data = pd.DataFrame()  # Fallback to empty DataFrame
+
+# Drop tickers with insufficient data
+data = data.dropna(axis=1, how='any')  # Remove stocks with missing data
+tickers = data.columns.tolist()  # Update the ticker list to include only valid tickers
 
 # Step 3: Calculate the percentage returns for each stock
 returns = data.pct_change().dropna()
@@ -28,7 +44,9 @@
 
 # Step 5: Function to calculate covariance between two stocks using the given formula
 def calculate_covariance(returns_a, returns_b, expected_return_a, expected_return_b):
-    n = len(returns_a)  # Number of observations
+    n = len(returns_a)
+    if n == 0:
+        return np.nan  # Return NaN if there is no data to calculate covariance
     covariance = sum((returns_a - expected_return_a) * (returns_b - expected_return_b)) / n
     return covariance
 
@@ -43,6 +61,7 @@ def calculate_covariance(returns_a, returns_b, expected_return_a, expected_retur
         expected_return_a = expected_returns[stock_a]
         expected_return_b = expected_returns[stock_b]
 
+        # Safely calculate covariance
         cov_matrix.loc[stock_a, stock_b] = calculate_covariance(returns_a, returns_b, expected_return_a, expected_return_b)
 
 # Step 8: Display the covariance matrix
@@ -51,4 +70,4 @@ def calculate_covariance(returns_a, returns_b, expected_return_a, expected_retur
 print("\nCovariance matrix saved to custom_covariance_matrix.csv\n")
 
 # Step 9: Save the covariance matrix to a CSV file
-cov_matrix.to_csv('custom_covariance_matrix.csv')
+cov_matrix.to_csv('custom_covariance_matrix.csv')