Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
jcmiii authored Apr 18, 2020
1 parent 42c2f60 commit f4d88d4
Showing 4 changed files with 592 additions and 0 deletions.
372 changes: 372 additions & 0 deletions dbe_2.0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
# Program for testing "no new highs lately" or "dying bullish euphoria" (DBE)
# strategy.

# Author: John Merkel
# Date: June 2019

# New to version 2.0
# * Added a ticker price threshold for reentry. When the ticker (index) falls
# below a user entered percent of the most recent new high then the program
# reenters the market regardless of the signal (bull or bear).

# New to version 1.2
# * Added plotting
# * Counts days since last M-day high

# New to version 1.1
# * Can upload data from a spreadsheet. For some reason downloading from
# this program only retrieves data since about 1970 for the S&P 500.
# However, you can manually download data from Y! that goes back to 1950.
# Similar issues for other indices. Hence uploading from a spreadsheet
# may be prefered.
# * Fixes copy of dataframe. v 1.0 did this incorrectly, so that the original
# dataframe was modified in the loop. I don't think it caused any issues,
# but it was not consistent with my intention.

# Import Modules
import pandas as pd
import datetime
import numpy as np
#import math
#import json

#%% Upload historical prices from an Excel spreadsheet
# If you want to download data from Yahoo! don't run this cell.
#
# For correct formating download data from Yahoo! into csv format and then
# simply "save as" a .xlsx spreadsheet.

# If you run this cell you probably do not want to run the next cell which
# downloads prices from Y!

# File name and sheet name
# Most recent data should be at the bottom.
eodDataFile = "snp500data_2019-6-18.xlsx"
sheet = "Data"

# Reads in col headings as str.
origDataDF = pd.read_excel(eodDataFile, sheet, index_col = 0)

# Do this if you want a smaller dataset for testing, checking post-discovery
# results, etc.
#origDataDF = origDataDF.tail(1000)

#%% Download Historical stock prices from Y!.
# Should include date, open, high, low, close, adjusted close, and volume.
# If you imported data from a spreadsheet above don't run this cell.

# Import module to download stock prices from Yahoo!
import pandas_datareader as web

# NOTE: this cell need not be run every time a parameter is changed, as the
# data in this dataframe is not changed elsewhere in the program. Only run
# this cell when parameters for this cell are changed! Otherwise you are
# querying Yahoo for data unnecessarily.

# Cell Parameters
tkr = '^IXIC' # Stock ticker that data will be downloaded for

# First trading day for SPY etf is 1993 Jan 29.
# By trial and error it seems the oldest date DataReader will allow is
# 1970 Jan 1, even tho S&P 500 data on Yahoo! goes back to approx 1950 Jan 3
# For NASDAQ (^IXIC) 1971, Feb 5
startDate = datetime.date(1971, 2, 1) # start date (yr, mo, day)
endDate = datetime.date(2019, 8, 2) # end date

# Download data. Most recent data is on bottom.
origDataDF = web.DataReader(tkr, 'yahoo', startDate, endDate)

#%% Parameters
reentryPct = 0.01 # If the price drops below (reentryPct * most recent New
# high) then reenter position regardless of signal.
# To disable set to 0
M = 107 # Looking for a new M-day hi
N = 134 # in last N days
K = 250 # Starting point for calculations. Should be at least
# as large as M+N
series = 'Adj Close' # Can use 'Close', 'High', 'Low', 'Open', 'Adj Close'
# For indices 'Close' = 'Adj Close' (I think)

###################################################
# Have we had a new M-day high in the last N days?

# First copy original downloaded data into new dataframe so we get clean data
# each time we rerun this cell.
eodDF = origDataDF.copy(deep = True)

# Find new M-day highs (True) and calculate reentry price
eodDF['MdayHi'] = eodDF[series].rolling(M).max()
eodDF['newHi'] = np.where(eodDF[series] == eodDF['MdayHi'], True, False)
eodDF['rePt'] = reentryPct*eodDF['MdayHi']

# Count days since last new M-day high.
# I found this soln on Stack Overflow.
# First run comparison to find where new contiguous groups begin (True)
eodDF['dSinceNewHi'] = (eodDF['newHi'] != eodDF['newHi'].shift(1))

# Now use cumsum() (cummulative sum) to count the number of "groups"
eodDF['dSinceNewHi'] = eodDF['dSinceNewHi'].cumsum()

# Now groupby() with cumcount() to form running count of each group. This
# counts first occurance as 0, which is correct when we transition to a new
# high (Trues), but is 1 too small when we transistion to "not a new high"
# (false). We are counting days since a new high (Falses) so add 1.
eodDF['dSinceNewHi'] = eodDF.groupby('dSinceNewHi').cumcount() + 1

# Finally, all occurances of 'True' in 'newHi' col yield a corresponding 0 in
# 'dSinceNewHi' col.
eodDF.loc[eodDF['newHi'] == True, 'dSinceNewHi'] = 0

# Have we had a new M-day high in the last N days?
eodDF.loc[eodDF['dSinceNewHi'] < N, 'signal'] = 'bull'
eodDF.loc[eodDF['signal'] != 'bull', 'signal'] = 'bear'

# Erase any signals prior to start of tracking
eodDF.loc[eodDF.index.values < eodDF.index.values[K], 'signal'] = np.nan

# IMPORTANT: we are assuming the signal is an end-of-day signal. So when
# the signal changes from 'bear' to 'bull' we would purchase the tkr at market
# close. We would therefor be in the market the following day. So there is a
# one-day lag between the signal and returns. The shift moves all signals
# foward one day. We then change the wording: bull=True, bear=False.
eodDF['inMkt'] = eodDF['signal'].shift(1)
eodDF['inMkt'] = eodDF['inMkt'].where(eodDF['inMkt'] == 'bull', False)
eodDF['inMkt'] = eodDF['inMkt'].where(eodDF['inMkt'] == False, True)

# Set values to False prior to when we start tracking. First possible valid
# signal day occurs at index M+N, but we will not be in the market that day.
eodDF.loc[eodDF.index.values <= eodDF.index.values[K], 'inMkt'] = False

############################################################################
# Now we calculate reetnry points due to price crossing below user set
# threashold. This will trigger if the signal is bear but the price has dropped
# below a user set percent of the most recent new high. We will then get back
# into the market and stay there until a new high is reached again.

# Create new column in dataframe, populate with NaN
eodDF['reentrySignal'] = np.nan

# Retrieve indexes where price is below reentry point and signal is 'bear'.
# If the signal rises above the reentry point while the signal is still 'bear'
# that will not be captured here, so we will forward fill below.
idxList = eodDF.loc[
(eodDF.Low < eodDF.rePt) & (eodDF.signal == 'bear')].index

# We want to be in the market on these days
eodDF.loc[idxList, 'reentrySignal'] = True

# Marker for a new high; turn off reentrySignal
eodDF.loc[eodDF.dSinceNewHi == 0, 'reentrySignal'] = False

# Now forward fill. True will forward fill until we hit the False marker.
# False will forward fill until it hits a True.
eodDF['reentrySignal'] = eodDF['reentrySignal'].fillna(method = 'ffill')

# All is good except that we need to extend the sequence of Trues by 1 so that
# we can transfer them to the inMkt column. Otherwise we'll be in the market
# until we hit a new high (good) and then we will be out one day (bad) before
# jumping back in.

# This gets the True where we need it.
eodDF['reentrySignal'] = ( eodDF['reentrySignal'] +
eodDF['reentrySignal'].shift(1) )

# But now we have a bunch of 2s that should be 1s. Fix that.
eodDF.loc[ eodDF['reentrySignal'] == 2, 'reentrySignal' ] = 1

# Not necessary, but change back to Trues and Falses
eodDF.loc[ eodDF['reentrySignal'] == 0, 'reentrySignal' ] = False
eodDF.loc[ eodDF['reentrySignal'] == 1, 'reentrySignal' ] = True

# Now copy the Trues over the inMkt column
eodDF.loc[ eodDF['reentrySignal'] == True, 'inMkt' ] = True


#%%###############################
# Calculate returns and statistics

# Calculate daily tkr returns. shift(1) is previous day's data
eodDF['tkrRtnDay'] = eodDF['Adj Close']/eodDF['Adj Close'].shift(1)

# Calculate running return. Note that first valid sell signal occurs at least
# M+N days after first day of data. Must estable M-day hi followed by N days
# w/o a new M-day hi. So this col only makes sence for index location
# past M+N
eodDF['tkrCumRtn'] = eodDF['Adj Close']/eodDF['Adj Close'][K]

# Calculate running CAGR.
# Intermediate calculatioin: years since starting date at M+N index
days_per_yr = 365.2422
eodDF['yrs'] = (eodDF.index.values - eodDF.index.values[K]).astype(
'timedelta64[D]') / (days_per_yr * np.timedelta64(1, 'D'))
eodDF['tkrCAGR'] = eodDF['tkrCumRtn']**(1 / eodDF['yrs'])

# Calculate daily return for algorithm. Same as return for ticker, except 1
# when inMkt is False
eodDF['dbeRtnDay'] = eodDF['tkrRtnDay'].where(eodDF['inMkt'] == True, 1)

# Calculate cumulative return starting at index K. To do this we will shift
# returns prior to K "off" the dataframe, then use the "cumprod()" fcn,
# then shift the cumulative product back into place. This will create "Not a
# Number" (NaN) entries prior to index K, which is probably a good thing,
# since those calculations would not be valid anyway.
eodDF['dbeCumRtn'] = eodDF['dbeRtnDay'].shift(-K).cumprod().shift(K)

# Calculate algorithm CAGR
eodDF['dbeCAGR'] = eodDF['dbeCumRtn']**(1 / eodDF['yrs'])

# Calculate mean trades per year
# Determine when trades took place
eodDF['trade'] = eodDF['inMkt'].shift(-1) - eodDF['inMkt']

# Erase any trades that occured before we start tracking
eodDF.loc[eodDF.index.values < eodDF.index.values[K], 'trade'] = 0

# Sum trades: Take absolute value then add
tradesPerYr = eodDF.trade.abs().sum() / eodDF['yrs'].iloc[-1]

# Calculate percent of time in the market
# Count 'bull' and 'bear' days and calculate percent.
numBulls = len(eodDF[eodDF['signal'] == 'bull'])
numBears = len(eodDF[eodDF['signal'] == 'bear'])
pctInMkt = 100 * numBulls / (numBulls + numBears)

#################################
# Print parameters and statistics
#print('Tkr = ', tkr)
print('M = ', M)
print('N = ', N)
print('K = ', K)
print('Reentry Pct = ', reentryPct)
print('Tkr CAGR = ', eodDF['tkrCAGR'].iloc[-1])
print('DBE CAGR = ', eodDF['dbeCAGR'].iloc[-1])
print('Years = ', eodDF['yrs'].iloc[-1])
print('Trades/yr = ', tradesPerYr)
print('Pct in mkt = ', pctInMkt, '%')

#%%##############################################
# Run this cell to write output to an Excel file.
# Kind of slow, so probably don't want to run it
# unless necessary.

# Parameters
excelOut = 'dbeCalculations.xlsx' # Excel file name
sheetName = 'dbe' # Sheet name

# Write to Excel file
# If you don't know which folder this is writing to try typing "pwd" at the
# prompt. It should return the current working directory.
writerObj = pd.ExcelWriter(excelOut)
eodDF.to_excel(writerObj,'dbe' ) # writes to an excel sheet
writerObj.save() # saves the excel workbook to disk

#%%##############################################
# Second (improved) attempt at plotting returns. Trying unsucessfully to get
# rid of discontinuity.

# Import Modules
import matplotlib.pyplot as plt

# Definitions
def plotFcn(group):
global ax
color = 'r' if (group['color'] < 1).all() else 'g'
ax.plot(group.index, group.dbe, c=color, linewidth=1)

# Pick starting, ending dates
startDate = datetime.date(1951, 6, 1) # start date (yr, mo, day)
endDate = datetime.date(2019, 6, 1) # end date

# Create a new dataframe to hold graph data. Probably other ways to do it but
# I found this idea on Stack Overflow. The tricky part is getting the graph to
# be two-colored; red = out of market, green = in market
plotDF = pd.DataFrame()
plotDF['dbe'] = eodDF[startDate : endDate]['dbeRtnDay'].cumprod()
plotDF['tkr'] = eodDF[startDate : endDate]['tkrRtnDay'].cumprod()

# Create color map: red = 0, green = 1. Basically red where inMkt = False(0),
# and green where inMkt = True(1).
plotDF['color'] = eodDF[startDate : endDate]['inMkt']

# Now find boundries. Make first entry a 0
plotDF['bdry'] = plotDF['color'] - plotDF['color'].shift(1)
plotDF['bdry'] = plotDF['bdry'].abs()
plotDF.loc[ plotDF.index[0], 'bdry'] = 0

# Calculate groups
plotDF['groups'] = plotDF['bdry'].cumsum()

# Plot returns
fig, ax = plt.subplots()

# plot of DBE. This throws out the endpoints of the sequences creating a
# non-continuous graph
plotDF.groupby( plotDF['groups'] ).apply(plotFcn)

# add "buy and hold"
ax.plot(plotDF.index, plotDF.tkr, 'blue', linewidth=1)

#%%##############################
# Test code 2

for x in range(K, eodDF.shape[0]):
print(eodDF['yrs'][x])

#%%##############################################
# Initial attempt at plotting returns

# Import Modules
import matplotlib.pyplot as plt

# Definitions
def plotFcn(group):
global ax
color = 'r' if (group['color'] < 1).all() else 'g'
ax.plot(group.index, group.dbe, c=color, linewidth=1)

# Pick starting, ending dates
startDate = datetime.date(2016, 3, 1) # start date (yr, mo, day)
endDate = datetime.date(2016, 7, 1) # end date

# Create a new dataframe to hold graph data. Probably other ways to do it but
# I found this idea on Stack Overflow. The tricky part is getting the graph to
# be two-colored; red = out of market, green = in market
plotDF = pd.DataFrame()
plotDF['dbe'] = eodDF[startDate : endDate]['dbeRtnDay'].cumprod()
plotDF['tkr'] = eodDF[startDate : endDate]['tkrRtnDay'].cumprod()

# Create color map: red = -1, green = 1. Basically red where inMkt = False(0),
# and green where inMkt = True(1), but we want to tack an extra False onto the
# beginning of the False sequence due to the lag in reacting to the signal.
plotDF['color'] = eodDF[startDate : endDate]['inMkt']

# Close, but now we need to tack on that extra False to the begining. This
# creates sequences of 0s and 2s with a single 1 seperating each sequence.
# Also, last entry is NaN. Change that to have same value as penultimate entry
plotDF['color'] = plotDF['color'] + plotDF['color'].shift(-1)
plotDF.loc[ plotDF.index[-1], 'color'] = plotDF['color'].iloc[-2]

# Now each sequence (0s or 2s) is seperated by a 1. Change the 1s to 0s, then
# subtract 1 from everything.
plotDF['color'] = plotDF['color'].where( plotDF['color'] != 1, 0)
plotDF['color'] = plotDF['color'] - 1

# Plot returns
fig, ax = plt.subplots()

# plot of DBE. This throws out the endpoints of the sequences creating a
# non-continuous graph
plotDF.groupby( (plotDF['color'] * plotDF['color'].shift(1) < 0
).cumsum() ).apply(plotFcn)

# add "buy and hold"
ax.plot(plotDF.index, plotDF.tkr, 'k', linewidth=1)

#%%#########################

eodDF[['Close','newHi','rePt','dSinceNewHi','signal']]





Binary file added dbe_SnP_m122_n214.xlsx
Binary file not shown.
220 changes: 220 additions & 0 deletions dbe_loop_1.1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
# Program for testing "no new highs lately" or "dying bullish euphoria" (DBE)
# strategy.

# Author: John Merkel
# Date: June 2019

# New to version 1.1
# * User can download data from an Excel file (still has capability to
# download from Y!)
# * Fixes copy of dataframe. v 1.0 did this incorrectly, so that the
# original dataframe was modified in the loop.

# This is basically dbe_1.0.py placed in a double loop. This program tests
# a range of values for M (new M day high?) and N (in last N days?). The
# output is stored in dataframes which are then written to seperate sheets
# of an Excel file.


# Import Modules
import pandas_datareader as web # used to download stock prices from Yahoo!
import pandas as pd
import datetime
import numpy as np
#import math
#import json

#%% Upload historical prices from an Excel spreadsheet
#
# For correct formating download data from Yahoo! into csv format and then
# simply "save as" a .xlsx spreadsheet.

# If you run this cell you probably do not want to run the next cell which
# downloads prices from Y!

# NOTE: this cell need not be run every time a parameter is changed, as the
# data in this dataframe is not changed elsewhere in the program (unless you
# run the cell below which downloads data from Y!). Only run this cell if you
# update your spreadsheet.

# File name and sheet name
# Most recent data should be at the bottom.
eodDataFile = "snp500data_2019-6-18.xlsx"
sheet = "Data"

# Reads in col headings as str.
# Slight modifications to upload a csv file
origDataDF = pd.read_excel(eodDataFile, sheet, index_col = 0)

#%% Download Historical stock prices from Y!. Should include date, open,
# high, low, close, adjusted close, and volume.

# NOTE: this cell need not be run every time a parameter is changed, as the
# data in this dataframe is not changed elsewhere in the program. Only run
# this cell when parameters for this cell are changed! Otherwise you are
# querying Yahoo for data unnecessarily.

# Cell Parameters
tkr = 'bam' # Stock ticker that data will be downloaded for

# First trading day for SPY etf is 1993 Jan 29.
# By trial and error it seems the oldest date DataReader will allow is
# 1970 Jan 1, even tho S&P 500 data on Yahoo! goes back to approx 1950 Jan 3
startDate = datetime.date(1980, 3, 17) # start date (yr, mo, day)
endDate = datetime.date(2019, 6, 28) # end date

# Download data. Most recent data is on bottom.
origDataDF = web.DataReader(tkr, 'yahoo', startDate, endDate)

#%% Main Loop

# Parameters
series = 'Adj Close' # Can use 'Close', 'High', 'Low', 'Open', 'Adj Close'
# For indices 'Close' = 'Adj Close' (I think)
days_per_yr = 365.2422

# Create a range
Mrange = range(1,63) # Range of M values. Looking for new M-day high
Nrange = range(10,100) # Range of N values. M-day high in last N days

# Pick tracking start point. This will be the number of market days past the
# start date above. To assure you are getting meaningful statistics this value
# should be at least as large as the sum of the two largest numbers in the
# range above. This parameter assures uniformity along trials (different
# values of M and N) by assuring that each trial starts tracking results on
# the same day.
K = 200

# Create dataframes to hold statistics
cagr = pd.DataFrame(index = Mrange, columns = Nrange)
trades = pd.DataFrame(index = Mrange, columns = Nrange)
pctInMkt = pd.DataFrame(index = Mrange, columns = Nrange)

# Loop thru parameter values.
for M in Mrange: # Looking for a new M day hi
print('M = ', M)
for N in Nrange: # in last N days
#print('N = ', N)

###################################################
# Have we had a new M-day high in the last N days?

# First copy original downloaded data into new dataframe so that we
# don't modify the original dataframe.
eodDF = origDataDF.copy(deep = True)
#eodDF = origDataDF[datetime.date(2009, 3, 17):].copy(deep = True)

# Find new M-day highs
eodDF['MdayHi'] = eodDF[series].rolling(M).max()
eodDF['newHi'] = np.where(eodDF[series] == eodDF['MdayHi'], True, False)

# Have we had a new M-day high in the last N days?
# Sum last N days of 'NewHi' col. Note True=1, False=0.
eodDF['signal'] = eodDF['newHi'].rolling(N).sum()

# If sum is 0, no new high in N days (bear). Otherwise 'bull'
eodDF['signal'] = eodDF.signal.where(eodDF.signal == 0, 'bull')

# Change 0s to 'bear's. If 'bull' keep it. Otherwise 'bear'.
eodDF['signal'] = eodDF.signal.where(eodDF.signal == 'bull', 'bear')

# IMPORTANT: we are assuming the signal is an end-of-day signal. So when
# the signal changes from 'bear' to 'bull' we would purchase the tkr at
# market close. We would therefor be in the market the following day. So
# there is a one-day lag between the signal and returns. This shift will
# move all signals foward one day. We then change the wording:
# bull=True, bear=False.
eodDF['inMkt'] = eodDF['signal'].shift(1)
eodDF['inMkt'] = eodDF['inMkt'].where(eodDF['inMkt'] == 'bull', False)
eodDF['inMkt'] = eodDF['inMkt'].where(eodDF['inMkt'] == False, True)

# Set values to False prior to when we start tracking. If we start tracking
# at index K that will be when we get our first signal, but we will not be
# in the market on that day. The first possible valid signal occurs at
# index M+N.
eodDF.loc[eodDF.index.values <= eodDF.index.values[K], 'inMkt'] = False

##################################
# Calculate returns and statistics

# Calculate daily tkr returns. shift(1) is previous day's data
# Adjusted close will take stock splits and dividends into account
eodDF['tkrRtnDay'] = eodDF['Adj Close']/eodDF['Adj Close'].shift(1)

# Calculate cumulative return. Note that first valid sell signal occurs at
# least M+N days after first day of data. Must estable M-day hi followed
# by N days w/o a new M-day hi. So this column only makes sence for index
# location past M+N

# Intermediate calculatioin: years since starting date at M+N index
eodDF['yrs'] = (eodDF.index.values - eodDF.index.values[K]).astype(
'timedelta64[D]') / (days_per_yr * np.timedelta64(1, 'D'))

# Calculate daily return for dbe. Same as return for ticker, except 1
# when inMkt is False
eodDF['dbeRtnDay'] = eodDF['tkrRtnDay'].where(eodDF['inMkt'] == True, 1)

# Now calculate cumulative return starting at index K. To do this we will
# shift returns prior to K "off" the dataframe, then apply the "cumprod()"
# fcn, then shift the cumulative product back into place. This will create
# "Not a Number" (NaN) entries prior to index K, which is probably a good
# thing since we are not tracking yet.
eodDF['dbeCumRtn'] = eodDF['dbeRtnDay'].shift(-K).cumprod().shift(K)

# Calculate algorithm CAGR
eodDF['dbeCAGR'] = eodDF['dbeCumRtn']**(1 / eodDF['yrs'])

# Calculate mean trades per year
# Determine when trades took place
eodDF['trade'] = eodDF['inMkt'].shift(-1) - eodDF['inMkt']

# Erase any trades that occured before we start tracking
eodDF.loc[eodDF.index.values < eodDF.index.values[K], 'trade'] = 0

# Sum trades: Take absolute value then add
tradesPerYr = eodDF.trade.abs().sum() / eodDF['yrs'].iloc[-1]

# Calculate percent of time in the market
# Erase any signals prior to start of tracking
eodDF.loc[eodDF.index.values < eodDF.index.values[K], 'signal'] = np.nan

# Count 'bull' and 'bear' days and calculate percent.
numBulls = len(eodDF[eodDF['signal'] == 'bull'])
numBears = len(eodDF[eodDF['signal'] == 'bear'])
p = 100 * numBulls / (numBulls + numBears)

#################################
# Place statistics into dataframes
cagr.loc[M,N] = eodDF['dbeCAGR'].iloc[-1]
trades.loc[M,N] = tradesPerYr
pctInMkt.loc[M,N] = p

# Calculate CAGR for the ticker
tkrRtn = eodDF['Adj Close'][-1] / eodDF['Adj Close'][K]
yrs = eodDF['yrs'].iloc[-1]
tkrCAGR = tkrRtn**(1/yrs)

# Print some statistics
print('Ticker CAGR = ', tkrCAGR)
print('Years = ', yrs)
print('Max CAGR = ', cagr.max())
#%%##############################################
# Run this cell to write output to an Excel file.
# Kind of slow.

# Parameters
excelOut = 'dbeStats.xlsx' # Excel file name
sheet0 = 'dbe' # Sheet name
sheet1 = 'cagr' # Sheet name
sheet2 = 'trades'
sheet3 = 'pctInMkt'

# Write to Excel file
# IF you don't know which folder this is writing to try typing "pwd" at the
# prompt. It should return the current working directory.
writerObj = pd.ExcelWriter(excelOut)
eodDF.to_excel(writerObj, sheet0) # writes to an excel sheet
cagr.to_excel(writerObj, sheet1) # writes to an excel sheet
trades.to_excel(writerObj, sheet2)
pctInMkt.to_excel(writerObj, sheet3)
writerObj.save() # saves the excel workbook to disk
Binary file added snp500data_2019-6-18.xlsx
Binary file not shown.

0 comments on commit f4d88d4

Please sign in to comment.