Skip to content

Commit b63b53f

Browse files
aseyboldt (Quacs)aseyboldt
aseyboldt (Quacs)
authored andcommitted
Add factor penalties in optimization
1 parent 0ec1aea commit b63b53f

File tree

1 file changed

+90
-32
lines changed

1 file changed

+90
-32
lines changed

bayesalpha/returns_model.py

Lines changed: 90 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ def __init__(self, data, algos, factors=None, predict=False,
9494
vlt = self._build_volatility(Bx_log_vlt)
9595

9696
gains_mu = self._build_gains_mu(in_sample)
97-
gains_time = self._build_gains_time(Bx_gains)
97+
#gains_time = self._build_gains_time(Bx_gains)
9898

99-
gains = gains_mu + gains_time
99+
gains = gains_mu# + gains_time
100100
if len(gains_factors.columns) > 0 and not self._predict:
101101
factors_gains = self._build_gains_factors()
102102
gains = gains + factors_gains
@@ -205,6 +205,7 @@ def _build_gains_mu(self, is_author_is):
205205
'gains_theta': ('algo',),
206206
'gains_eta': ('algo',),
207207
'author_is': ('algo',),
208+
'author_is_raw': ('algo',),
208209
'gains': ('algo',),
209210
'gains_raw': ('algo',),
210211
})
@@ -250,13 +251,18 @@ def _build_gains_mu(self, is_author_is):
250251
elif shrinkage == 'skew-neg2-normal':
251252
gains_sd = pm.HalfNormal('gains_sd', sd=0.1)
252253
pm.Deterministic('log_gains_sd', tt.log(gains_sd))
253-
gains_mu = pm.Normal('gains_mu', mu=0.02, sd=0.1)
254+
gains_mu = pm.Normal('gains_mu', sd=0.1)
254255
gains_raw = pm.SkewNormal(
255256
'gains_raw', sd=1, mu=0, alpha=-4, shape=k)
256257

257-
author_is = pm.Normal('author_is', shape=k, sd=0.4, mu=0.1)
258+
author_is = pm.Normal('author_is', shape=k, sd=0.4, mu=0.0)
259+
#log_author_is_mu = pm.Normal('log_author_is_mu', mu=-3, sd=3)
260+
#log_author_is_sd = pm.HalfNormal('log_author_is_sd', sd=0.8)
261+
#log_author_is = pm.Normal('author_is_raw', shape=k, mu=0, sd=1)
262+
#author_is = tt.exp(log_author_is * log_author_is_sd + log_author_is_mu)
263+
#pm.Deterministic('author_is', author_is)
258264
gains = pm.Deterministic('gains', gains_sd * gains_raw + gains_mu)
259-
#gains_all = (1 - is_author_is) * gains[None, :] + author_is[None, :] * is_author_is
265+
gains_all = (1 - is_author_is) * gains[None, :] + author_is[None, :] * is_author_is
260266
gains_all = gains[None, :] + author_is[None, :] * is_author_is
261267
elif shrinkage == 'skew-normal':
262268
gains_sd = pm.HalfNormal('gains_sd', sd=0.1)
@@ -297,7 +303,7 @@ def _build_gains_time(self, Bx_gains):
297303
self.dims.update({
298304
'gains_time_sd_raw': ('algo',),
299305
'gains_time_sd': ('algo',),
300-
'log_gains_time_sd': ('algo',),
306+
#'log_gains_time_sd': ('algo',),
301307
'gains_time_raw': ('algo', 'time_raw_gains'),
302308
'gains_time': ('algo', 'time'),
303309
})
@@ -410,6 +416,7 @@ def make_predict_function(self, factor_scale_halflife=None):
410416
allow_input_downcast=True)
411417

412418
algos = self.coords['algo']
419+
factors = self.coords['factor']
413420
time = self.coords['time']
414421

415422
def predict(point):
@@ -438,7 +445,8 @@ def predict(point):
438445
returns[...] += factor_rets
439446

440447
returns = xr.DataArray(returns, coords=[algos, time])
441-
return returns
448+
exposures = xr.DataArray(factor_exposures, coords=[factors, algos])
449+
return xr.Dataset({'returns': returns, 'exposures': exposures})
442450

443451
return predict
444452

@@ -555,21 +563,29 @@ def predict(self, n_days, n_repl=None, factor_scale_halflife=None):
555563
predict_func = model.make_predict_function(factor_scale_halflife)
556564
coords = [self.trace.chain, self.trace.sample,
557565
self.trace.algo, model.coords['time']]
566+
coords_exposures = [self.trace.chain, self.trace.sample,
567+
self.trace.factor, self.trace.algo]
558568
if n_repl is not None:
559569
repl_coord = pd.RangeIndex(n_repl, name='sim_repl')
560570
coords.append(repl_coord)
571+
coords_exposures.append(repl_coord)
561572
shape = [len(vals) for vals in coords]
562573

563-
prediction_data = np.zeros(shape)
564-
predictions = xr.DataArray(prediction_data, coords=coords)
574+
returns_data = np.zeros(shape)
575+
exposure_data = np.zeros([len(v) for v in coords_exposures])
576+
returns = xr.DataArray(returns_data, coords=coords)
577+
exposures = xr.DataArray(exposure_data, coords=coords_exposures)
565578
for (chain, sample), point in self._points(include_transformed=False):
566579
if n_repl is None:
567-
predictions.loc[chain, sample, :, :] = predict_func(point)
580+
prediction = predict_func(point)
581+
returns.loc[chain, sample, :, :] = prediction.returns
582+
exposures.loc[chain, sample, :, :] = prediction.exposures
568583
else:
569584
for repl in repl_coord:
570-
returns = predict_func(point)
571-
predictions.loc[chain, sample, :, :, repl] = returns
572-
return predictions
585+
prediction = predict_func(point)
586+
returns.loc[chain, sample, :, :, repl] = prediction.returns
587+
exposures.loc[chain, sample, :, :, repl] = prediction.exposures
588+
return xr.Dataset({'returns': returns, 'exposures': exposures})
573589

574590
def predict_value(self, n_days, n_repl=None, factor_scale_halflife=None):
575591
model = self._make_prediction_model(n_days)
@@ -603,34 +619,37 @@ def prediction_iter(self, n_days):
603619

604620
class Optimizer(object):
605621
def __init__(self, predictions, utility='isoelastic', lmda=None,
606-
factor_weights=None, max_weights=None):
622+
factor_penalty=None, max_weights=None, exposure_limit=None,
623+
exposure_penalty=None):
607624
"""Compute a portfolio based on model predictions.
608625
609626
Parameters
610627
----------
611-
predictions : xr.DataSet
628+
predictions : xr.Dataset
612629
Predictions as returned by fit.predict_value
613630
utility : ['isoelastic', 'exp'], default='isoelastic'
614631
The utility function to use.
615632
lmda : float
616633
Risk aversion parameter. This value can be overridden
617634
by passing a different value to `solve`.
618-
factor_weights : ndarray
619-
TODO
635+
factor_penalty : float
620636
"""
621637
if cvxpy is None:
622638
raise RuntimeError('Optimization requires cvxpy>=1.0')
623-
self._returns = predictions
624-
self._problem = self._build_problem(lmda, factor_weights, utility)
639+
self._predictions = predictions
640+
self._problem = self._build_problem(lmda, utility, factor_penalty,
641+
exposure_limit, exposure_penalty)
625642
if max_weights is None:
626643
max_weights = [1] * len(predictions.algo)
627644
self._max_weights = max_weights
628645

629-
def _build_problem(self, lmda_vals, factor_weights_vals, utility):
630-
n_predict = (len(self._returns.chain)
631-
* len(self._returns.sample)
632-
* len(self._returns.sim_repl))
633-
n_algos = len(self._returns.algo)
646+
def _build_problem(self, lmda_vals, utility, factor_penalty=None,
647+
exposure_limit=None, exposure_penalty=None):
648+
n_predict = (len(self._predictions.chain)
649+
* len(self._predictions.sample)
650+
* len(self._predictions.sim_repl))
651+
n_algos = len(self._predictions.algo)
652+
n_factors = len(self._predictions.factor)
634653
lmda = cvxpy.Parameter(name='lambda', nonneg=True)
635654
returns = cvxpy.Parameter(shape=(n_predict, n_algos), name='returns')
636655
max_weights = cvxpy.Parameter(shape=(n_algos), name='max_weights')
@@ -643,13 +662,47 @@ def _build_problem(self, lmda_vals, factor_weights_vals, utility):
643662
else:
644663
raise ValueError('Unknown utility: %s' % utility)
645664

646-
problem = cvxpy.Problem(
647-
cvxpy.Minimize(risk),
648-
[cvxpy.sum(weights) == 1, weights >= 0, weights <= max_weights])
665+
if factor_penalty is not None:
666+
penalty = cvxpy.Parameter(shape=(), name='factor_penalty', nonneg=True)
667+
self._factor_penalty_p = penalty
668+
for i in range(n_factors):
669+
exposures = cvxpy.Parameter(shape=(n_predict, n_algos),
670+
name='exposures_%s' % i)
671+
exposures.value = self._predictions.exposures.isel(factor=i).stack(
672+
prediction=('chain', 'sample', 'sim_repl')).values.T
673+
risk_factor = cvxpy.sum_squares(exposures * weights)
674+
risk = risk + penalty * risk_factor
675+
676+
if exposure_penalty is not None:
677+
penalty = cvxpy.Parameter(shape=(), name='exposure_penalty', nonneg=True)
678+
self._exposure_penalty_p = penalty
679+
exposure_data = self._predictions.position_exposures
680+
n_history = len(exposure_data.time_hist)
681+
exposures = cvxpy.Parameter(shape=(n_history, n_algos),
682+
name='position_exposures')
683+
exposures.value = exposure_data.values
684+
risk_factor = cvxpy.sum_squares(exposures * weights)
685+
risk = risk + penalty * risk_factor * n_predict / n_history
686+
687+
constraints = [cvxpy.sum(weights) == 1, weights >= 0, weights <= max_weights]
688+
if exposure_limit is not None:
689+
limit = cvxpy.Parameter(name='exposure_limit', nonneg=True)
690+
self._exposure_limit = limit
691+
limit.value = exposure_limit
692+
exposures_lower = cvxpy.Parameter(shape=(n_algos,), name='exposures_lower')
693+
exposures_upper = cvxpy.Parameter(shape=(n_algos,), name='exposures_upper')
694+
exposure_data = self._predictions.position_exposures
695+
exposures_lower.value = exposure_data.sel(quantile='lower').values
696+
exposures_upper.value = exposure_data.sel(quantile='upper').values
697+
lower = cvxpy.sum(weights * exposures_lower) >= -limit
698+
upper = cvxpy.sum(weights * exposures_upper) <= limit
699+
constraints.extend([lower, upper])
700+
701+
problem = cvxpy.Problem(cvxpy.Minimize(risk), constraints)
649702

650703
if lmda_vals is not None:
651704
lmda.value = lmda_vals
652-
predictions = self._returns.stack(
705+
predictions = self._predictions.cum_final.stack(
653706
prediction=('chain', 'sample', 'sim_repl'))
654707
# +1 because we want the final wealth, when we start with
655708
# a unit of money.
@@ -661,12 +714,17 @@ def _build_problem(self, lmda_vals, factor_weights_vals, utility):
661714
self._max_weights_v = max_weights
662715
return problem
663716

664-
def solve(self, lmda=None, factor_weights=None, max_weights=None, **kwargs):
717+
def solve(self, lmda=None, factor_penalty=None, max_weights=None,
718+
exposure_limit=None, exposure_penalty=None, **kwargs):
665719
"""Find the optimal weights for the portfolio."""
666720
if lmda is not None:
667721
self._lmda_p.value = lmda
668-
if factor_weights is not None:
669-
self._factor_weights_p.value = factor_weights
722+
if exposure_penalty is not None:
723+
self._exposure_penalty_p.value = exposure_penalty
724+
if factor_penalty is not None:
725+
self._factor_penalty_p.value = factor_penalty
726+
if exposure_limit is not None:
727+
self._exposure_limit.value = exposure_limit
670728
if max_weights is not None:
671729
self._max_weights_v.value = max_weights
672730
else:
@@ -675,7 +733,7 @@ def solve(self, lmda=None, factor_weights=None, max_weights=None, **kwargs):
675733
if self._problem.status != 'optimal':
676734
raise ValueError('Optimization did not converge.')
677735
weights = self._weights_v.value.ravel().copy()
678-
algos = self._returns.algo
736+
algos = self._predictions.algo
679737
return xr.DataArray(weights, coords=[algos], name='weights')
680738

681739

0 commit comments

Comments
 (0)