Skip to content

Commit b0f8e5b

Browse files
committed
Merge pull request #120 from quantopian/best
Bayesian refactor + new model
2 parents 81ca8c0 + 4170dcd commit b0f8e5b

File tree

2 files changed

+325
-42
lines changed

2 files changed

+325
-42
lines changed

pyfolio/bayesian.py

Lines changed: 261 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pandas as pd
1919
import scipy as sp
2020
from scipy import stats
21+
import seaborn as sns
2122

2223
import matplotlib.pyplot as plt
2324

@@ -135,7 +136,7 @@ def model_returns_t(data, samples=500):
135136
----------
136137
returns : pandas.Series
137138
Series of simple returns of an algorithm or stock.
138-
samples : int (optional)
139+
samples : int, optional
139140
Number of posterior samples to draw.
140141
141142
Returns
@@ -165,6 +166,260 @@ def model_returns_t(data, samples=500):
165166
return trace
166167

167168

169+
def model_best(y1, y2, samples=1000):
170+
"""Bayesian Estimation Supersedes the T-Test
171+
172+
This model runs a Bayesian hypothesis comparing if y1 and y2 come
173+
from the same distribution. Returns are assumed to be T-distributed.
174+
175+
In addition, computes annual volatility and Sharpe of in and
176+
out-of-sample periods.
177+
178+
This model replicates the example used in:
179+
Kruschke, John. (2012) Bayesian estimation supersedes the t
180+
test. Journal of Experimental Psychology: General.
181+
182+
Parameters
183+
----------
184+
y1 : array-like
185+
Array of returns (e.g. in-sample)
186+
y2 : array-like
187+
Array of returns (e.g. out-of-sample)
188+
samples : int, optional
189+
Number of posterior samples to draw.
190+
191+
Returns
192+
-------
193+
pymc3.sampling.BaseTrace object
194+
A PyMC3 trace object that contains samples for each parameter
195+
of the posterior.
196+
197+
See Also
198+
--------
199+
plot_stoch_vol : plotting of tochastic volatility model
200+
"""
201+
202+
y = np.concatenate((y1, y2))
203+
204+
mu_m = np.mean(y)
205+
mu_p = 0.000001 * 1 / np.std(y)**2
206+
207+
sigma_low = np.std(y)/1000
208+
sigma_high = np.std(y)*1000
209+
with pm.Model():
210+
group1_mean = pm.Normal('group1_mean', mu=mu_m, tau=mu_p,
211+
testval=y1.mean())
212+
group2_mean = pm.Normal('group2_mean', mu=mu_m, tau=mu_p,
213+
testval=y2.mean())
214+
group1_std = pm.Uniform('group1_std', lower=sigma_low,
215+
upper=sigma_high, testval=y1.std())
216+
group2_std = pm.Uniform('group2_std', lower=sigma_low,
217+
upper=sigma_high, testval=y2.std())
218+
nu = pm.Exponential('nu_minus_one', 1/29., testval=4.) + 1
219+
220+
returns_group1 = pm.T('group1', nu=nu, mu=group1_mean,
221+
lam=group1_std**-2, observed=y1)
222+
returns_group2 = pm.T('group2', nu=nu, mu=group2_mean,
223+
lam=group2_std**-2, observed=y2)
224+
225+
diff_of_means = pm.Deterministic('difference of means',
226+
group2_mean - group1_mean)
227+
pm.Deterministic('difference of stds',
228+
group2_std - group1_std)
229+
pm.Deterministic('effect size', diff_of_means /
230+
pm.sqrt((group1_std**2 +
231+
group2_std**2) / 2))
232+
233+
pm.Deterministic('group1_annual_volatility',
234+
returns_group1.distribution.variance**.5 *
235+
np.sqrt(252))
236+
pm.Deterministic('group2_annual_volatility',
237+
returns_group2.distribution.variance**.5 *
238+
np.sqrt(252))
239+
240+
pm.Deterministic('group1_sharpe', returns_group1.distribution.mean /
241+
returns_group1.distribution.variance**.5 *
242+
np.sqrt(252))
243+
pm.Deterministic('group2_sharpe', returns_group2.distribution.mean /
244+
returns_group2.distribution.variance**.5 *
245+
np.sqrt(252))
246+
247+
step = pm.NUTS()
248+
249+
trace = pm.sample(samples, step)
250+
return trace
251+
252+
253+
def plot_best(trace=None, data_train=None, data_test=None,
254+
samples=1000, burn=200, axs=None):
255+
"""Plot BEST significance analysis.
256+
257+
Parameters
258+
----------
259+
trace : pymc3.sampling.BaseTrace, optional
260+
trace object as returned by model_best()
261+
If not passed, will run model_best(), for which
262+
data_train and data_test are required.
263+
data_train : pandas.Series, optional
264+
Returns of in-sample period.
265+
Required if trace=None.
266+
data_test : pandas.Series, optional
267+
Returns of out-of-sample period.
268+
Required if trace=None.
269+
samples : int, optional
270+
Posterior samples to draw.
271+
burn : int
272+
Posterior sampels to discard as burn-in.
273+
axs : array of matplotlib.axes objects, optional
274+
Plot into passed axes objects. Needs 6 axes.
275+
276+
Returns
277+
-------
278+
None
279+
280+
See Also
281+
--------
282+
model_best : Estimation of BEST model.
283+
"""
284+
if trace is None:
285+
if (data_train is not None) or (data_test is not None):
286+
raise ValueError('Either pass trace or data_train and data_test')
287+
trace = model_best(data_train, data_test, samples=samples)
288+
289+
trace = trace[burn:]
290+
if axs is None:
291+
fig, axs = plt.subplots(ncols=2, nrows=3, figsize=(16, 4))
292+
293+
def distplot_w_perc(trace, ax):
294+
sns.distplot(trace, ax=ax)
295+
ax.axvline(
296+
stats.scoreatpercentile(trace, 2.5),
297+
color='0.5', label='2.5 and 97.5 percentiles')
298+
ax.axvline(
299+
stats.scoreatpercentile(trace, 97.5),
300+
color='0.5')
301+
302+
sns.distplot(trace['group1_mean'], ax=axs[0], label='backtest')
303+
sns.distplot(trace['group2_mean'], ax=axs[0], label='forward')
304+
axs[0].legend(loc=0)
305+
axs[1].legend(loc=0)
306+
307+
distplot_w_perc(trace['difference of means'], axs[1])
308+
309+
axs[0].set(xlabel='mean', ylabel='belief', yticklabels=[])
310+
axs[1].set(xlabel='difference of means', yticklabels=[])
311+
312+
sns.distplot(trace['group1_annual_volatility'], ax=axs[2],
313+
label='backtest')
314+
sns.distplot(trace['group2_annual_volatility'], ax=axs[2],
315+
label='forward')
316+
distplot_w_perc(trace['group2_annual_volatility'] -
317+
trace['group1_annual_volatility'], axs[3])
318+
axs[2].set(xlabel='Annual volatility', ylabel='belief',
319+
yticklabels=[])
320+
axs[2].legend(loc=0)
321+
axs[3].set(xlabel='difference of volatility', yticklabels=[])
322+
323+
sns.distplot(trace['group1_sharpe'], ax=axs[4], label='backtest')
324+
sns.distplot(trace['group2_sharpe'], ax=axs[4], label='forward')
325+
distplot_w_perc(trace['group2_sharpe'] - trace['group1_sharpe'],
326+
axs[5])
327+
axs[4].set(xlabel='Sharpe', ylabel='belief', yticklabels=[])
328+
axs[4].legend(loc=0)
329+
axs[5].set(xlabel='difference of Sharpes', yticklabels=[])
330+
331+
sns.distplot(trace['effect size'], ax=axs[6])
332+
axs[6].axvline(
333+
stats.scoreatpercentile(trace['effect size'], 2.5),
334+
color='0.5')
335+
axs[6].axvline(
336+
stats.scoreatpercentile(trace['effect size'], 97.5),
337+
color='0.5')
338+
axs[6].set(xlabel='difference of means normalized by volatility',
339+
ylabel='belief', yticklabels=[])
340+
341+
342+
def model_stoch_vol(data, samples=2000):
343+
"""Run stochastic volatility model.
344+
345+
This model estimates the volatility of a returns series over time.
346+
Returns are assumed to be T-distributed. lambda (width of
347+
T-distributed) is assumed to follow a random-walk.
348+
349+
Parameters
350+
----------
351+
data : pandas.Series
352+
Return series to model.
353+
samples : int, optional
354+
Posterior samples to draw.
355+
356+
Returns
357+
-------
358+
pymc3.sampling.BaseTrace object
359+
A PyMC3 trace object that contains samples for each parameter
360+
of the posterior.
361+
362+
See Also
363+
--------
364+
plot_stoch_vol : plotting of tochastic volatility model
365+
"""
366+
from pymc3.distributions.timeseries import GaussianRandomWalk
367+
368+
with pm.Model():
369+
nu = pm.Exponential('nu', 1./10, testval=5.)
370+
sigma = pm.Exponential('sigma', 1./.02, testval=.1)
371+
s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
372+
volatility_process = pm.Deterministic('volatility_process',
373+
pm.exp(-2*s))
374+
pm.T('r', nu, lam=volatility_process, observed=data)
375+
start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)
376+
377+
step = pm.NUTS(scaling=start)
378+
trace = pm.sample(100, step, progressbar=False)
379+
380+
# Start next run at the last sampled position.
381+
step = pm.NUTS(scaling=trace[-1], gamma=.25)
382+
trace = pm.sample(samples, step, start=trace[-1],
383+
progressbar=False, njobs=2)
384+
385+
return trace
386+
387+
388+
def plot_stoch_vol(data, trace=None, ax=None):
389+
"""Generate plot for stochastic volatility model.
390+
391+
Parameters
392+
----------
393+
data : pandas.Series
394+
Returns to model.
395+
trace : pymc3.sampling.BaseTrace object, optional
396+
trace as returned by model_stoch_vol
397+
If not passed, sample from model.
398+
ax : matplotlib.axes object, optional
399+
Plot into axes object
400+
401+
Returns
402+
-------
403+
ax object
404+
405+
See Also
406+
--------
407+
model_stoch_vol : run stochastic volatility model
408+
"""
409+
if trace is None:
410+
trace = model_stoch_vol(data)
411+
412+
if ax is None:
413+
fig, ax = plt.subplots(figsize=(15, 8))
414+
415+
data.abs().plot(ax=ax)
416+
ax.plot(data.index, np.exp(trace['s', ::30].T), 'r', alpha=.03)
417+
ax.set(title='Stochastic Volatility', xlabel='time', ylabel='volatility')
418+
ax.legend(['abs returns', 'stochastic volatility process'])
419+
420+
return ax
421+
422+
168423
def compute_bayes_cone(preds, starting_value=1.):
169424
"""Compute 5, 25, 75 and 95 percentiles of cumulative returns, used
170425
for the Bayesian cone.
@@ -265,7 +520,7 @@ def run_model(model, returns_train, returns_test=None,
265520
266521
Parameters
267522
----------
268-
model : {'alpha_beta', 't', 'normal'}
523+
model : {'alpha_beta', 't', 'normal', 'best'}
269524
Which model to run
270525
returns_train : pd.Series
271526
Timeseries of simple returns
@@ -297,9 +552,12 @@ def run_model(model, returns_train, returns_test=None,
297552
trace = model_returns_t(rets, samples)
298553
elif model == 'normal':
299554
trace = model_returns_normal(rets, samples)
555+
elif model == 'best':
556+
trace = model_best(returns_train, returns_test, samples=samples)
300557
else:
301558
raise NotImplementedError(
302-
'Model {} not found. Use alpha_beta, t, or normal.'.format(model))
559+
'Model {} not found.'
560+
'Use alpha_beta, t, normal, or best.'.format(model))
303561

304562
return trace
305563

0 commit comments

Comments
 (0)