SharpeRatio.R
In SharpeR: Statistical Significance of the Sharpe Ratio

## ----'preamble', include=FALSE, warning=FALSE, message=FALSE----
library(knitr)

# set the knitr options ... for everyone!
# if you unset this, then vignette build bonks. oh, joy.
#opts_knit$set(progress=TRUE)
opts_knit$set(eval.after='fig.cap')
# for a package vignette, you do want to echo.
# opts_chunk$set(echo=FALSE,warning=FALSE,message=FALSE)
opts_chunk$set(warning=FALSE,message=FALSE)
#opts_chunk$set(results="asis")
opts_chunk$set(cache=TRUE,cache.path="cache/SharpeRatio")

#opts_chunk$set(fig.path="figure/",dev=c("pdf","cairo_ps"))
opts_chunk$set(fig.path="figure/SharpeRatio",dev=c("pdf"))
opts_chunk$set(fig.width=5,fig.height=4,dpi=64)

# doing this means that png files are made of figures;
# the savings is small, and it looks like shit:
#opts_chunk$set(fig.path="figure/",dev=c("png","pdf","cairo_ps"))
#opts_chunk$set(fig.width=4,fig.height=4)
# for figures? this is sweave-specific?
#opts_knit$set(eps=TRUE)

# this would be for figures:
#opts_chunk$set(out.width='.8\\textwidth')
# for text wrapping:
options(width=64,digits=2)
opts_chunk$set(size="small")
opts_chunk$set(tidy=TRUE,tidy.opts=list(width.cutoff=50,keep.blank.line=TRUE))

compile.time <- Sys.time()

# from the environment

# only recompute if FORCE_RECOMPUTE=True w/out case match.
FORCE_RECOMPUTE <- 
	(toupper(Sys.getenv('FORCE_RECOMPUTE',unset='False')) == "TRUE")

# compiler flags!

# not used yet
LONG.FORM <- FALSE

mc.resolution <- ifelse(LONG.FORM,1000,200)
mc.resolution <- max(mc.resolution,100)

library(quantmod)
options("getSymbols.warning4.0"=FALSE)

library(SharpeR)

gen_norm <- rnorm
lseq <- function(from,to,length.out) { 
	exp(seq(log(from),log(to),length.out = length.out))
}

## ----'generate_tbias',include=FALSE---------------------------
# the bias of the t
f_tbias <- function(n) { 
	sqrt((n-1) / 2) * exp(lgamma((n-2)/2) - lgamma((n-1)/2))
}
# approximate tbias
f_apx_tbias1 <- function(n) { 
	1 + (0.75 / n)
}
f_apx_tbias2 <- function(n) { 
	#1 + (0.75 / n) + (2 / n**2)
	1 + (0.75 / (n - 1)) + (32 / (25 * ((n-1) ** 2)))
}
n <- 12
tbias <- f_tbias(n)

## ----'f_tpower',echo=FALSE------------------------------------
# the power of the univariate t-test;
f_tpower <- function(n,rho = 0,alpha = 0.05) {
	f_tpower_ncp(ncp = sqrt(n) * rho,n = n,alpha = alpha)
}

# the power of the univariate t-test as function of the ncp
f_tpower_ncp <- function(ncp,n,alpha = 0.05) {
	pt(qt(1-alpha,n-1),df = n-1,ncp = ncp,lower.tail = FALSE)
}

# the power of an f-test 
f_fpower <- function(df1,df2,ncp,alpha = 0.05) {
	pf(qf(alpha,df1=df1,df2=df2,ncp=0,lower.tail=FALSE),df1 = df1,df2 = df2,ncp = ncp,lower.tail = FALSE)
}

# find the sample size for a given power of the univariate t-test
f_treqsize <- function(rho,powr = 0.80,alpha = 0.05) {
	zz <- uniroot(function(n,rho,alpha,powr)(f_tpower(n,rho,alpha) - powr),
								c(8,20 * 10 / (rho*rho)),rho = rho,powr = powr,alpha = alpha)
	return(zz$root)
}

## ----'sr_power_table',echo=FALSE,results='asis'---------------
dpy <- 253
rhovals <- seq(0.5,3.0,by=0.10)
rhovals_day <- rhovals / sqrt(dpy)
samps25.1 <- (1/dpy) * unlist(lapply(rhovals_day, f_treqsize, powr = 0.25))
samps50.1 <- (1/dpy) * unlist(lapply(rhovals_day, f_treqsize, powr = 0.50))
samps80.1 <- (1/dpy) * unlist(lapply(rhovals_day, f_treqsize, powr = 0.80))
#2sided test
samps25.2 <- (1/dpy) * unlist(lapply(rhovals_day, f_treqsize, alpha=0.025, powr = 0.25))
samps50.2 <- (1/dpy) * unlist(lapply(rhovals_day, f_treqsize, alpha=0.025, powr = 0.50))
samps80.2 <- (1/dpy) * unlist(lapply(rhovals_day, f_treqsize, alpha=0.025, powr = 0.80))
foo <- data.frame("one sided" = c(median(samps25.1 * rhovals**2),median(samps50.1 * rhovals**2),median(samps80.1 * rhovals**2)),
									"two sided" = c(median(samps25.2 * rhovals**2),median(samps50.2 * rhovals**2),median(samps80.2 * rhovals**2)),
									row.names = c("power = 0.25","power = 0.50","power = 0.80"))
library(xtable)
xres <- xtable(foo,label="tab:ttestpower",caption="Scaling of sample size with respect to $\\psnr^2$ required to achieve a fixed power in the t-test, at a fixed $\\typeI = 0.05$ rate.")
print(xres,include.rownames=TRUE)

## ----'power_thing',echo=FALSE,fig.cap="The percent error of the power mnemonic $e\\approx\\ssiz \\psnrsq$ is plotted versus \\psnr."----
ope <- 253
 zetas <- seq(0.1,2.5,length.out=51)
ssizes <- sapply(zetas,function(zed) { 
 x <- power.sr_test(n=NULL,zeta=zed,sig.level=0.05,power=0.5,ope=ope)
 x$n / ope 
})
plot(zetas,100 * ((exp(1) / zetas^2) - ssizes)/ssizes, ylab="error in mnemonic rule (as %)")

## ----'sobering',include=FALSE---------------------------------
foo.power <- power.sr_test(n=253,zeta=NULL,sig.level=0.05,power=0.5,ope=253)

## ----"autocorr_setup",echo=FALSE------------------------------
fname <- system.file('extdata','autocorr_study.rda',package='SharpeR')
if (fname == "") {
	fname <- 'autocorr_study.rda'
}
# poofs phivals empiricals predicteds ntrials nyr
load(fname)

# maybe need these some other time?
# vanilla Sharpe ratio in terms of whatever input units
f_vsharpe <- function(rets) {
	return(mean(rets) / sd(rets))
}
# annualized Sharpe ratio
f_asharpe <- function(rets, dpy = 253) {
	return(sqrt(dpy) * f_vsharpe(rets))
}
# the t statistic
f_tstat <- function(rets) { 
	return(sqrt(length(rets)) * f_vsharpe(rets))
}

## ----'autocorr_spread',echo=FALSE,fig.cap=paste("The empirical standard deviation for the \\tstat-statistic is shown at different values of the autocorrelation, \\pacor. Each point represents",ntrials,"series of approximately",nyr,"years of daily data, with each series generated by an AR(1) process with normal innovations.  Each series has actual SNR of zero. The fit line is that suggested by Van Belle's correction for autocorrelation, namely $\\sqrt{(1 + \\pacor) / (1 - \\pacor)}$.")----
plot(phivals,empiricals,pch=1,col="red",
	ylab="standard deviation of t statistic",
	xlab="autocorrelation")
lines(phivals,predicteds,col="black")
legend("topleft",c("Empirical","Predicted"),
	lty=c(0,1),pch=c(1,NA_integer_),col=c("red","black"))

## ----'leverage_foo',include=FALSE-----------------------------
Elevi <- 1.5 
Elevis <- Elevi ** 2
Vlevi <- 1 / 12
ratlevi <- Vlevi / Elevis
vixlevi <- (0.4)^2

## ----'skewstudy_prelim',echo=FALSE,cache=FALSE----------------
# MOCK it up.
fname <- system.file('extdata','skew_study.rda',package='SharpeR')
if (fname == "") {
	fname <- 'skew_study.rda'
}
# poofs res, ntrials, SPX.rets, dpy
load(fname)

SPX.start <- time(SPX.rets[1])
SPX.end <- time(SPX.rets[length(SPX.rets)])

## ----'skewstudy',echo=FALSE,results='asis',cache=FALSE--------
# res, ntrials are poofed above.
#digits=c(0,0,0,2,2,2),
xres <- xtable(res,label="tab:sharpe_skew_robustness",
							 display=c('s','s','s','g','g','fg','fg'),
							 align="ll|lrr|rr",
							 caption=paste("Empirical type I rates of the test for $\\psnr = 1.0\\yrtomhalf$ via distribution of the \\txtSR are given for various distributions of returns.  The empirical rates are based on ", 
														 ntrials, 
														 " simulations of three years of daily returns, with a nominal rate of $\\typeI = 0.05$. The 'corrected' type I rates refer to a normal approximation using Mertens' correction. Skew appears to have a much more adverse effect than kurtosis alone."))
print(xres,include.rownames=FALSE,hline.after=c(0,0,2,4,7,10,dim(res)[1]))

## ----'hot_ssiz_table',echo=FALSE,results='asis'---------------
# MOCK it up.
fname <- system.file('extdata','hotelling_power_rule.rda',package='SharpeR')
if (fname == "") {
	fname <- 'hotelling_power_rule.rda'
}
# poofs res, ntrials, SPX.rets, dpy
load(fname)
xres <- xtable(hot.power,label="tab:htestpower",
	caption="The numerator in the sample size relationship required to achieve a fixed power in Hotelling's test is shown. The type I rate is 0.05.")
print(xres,include.rownames=TRUE,sanitize.text.function=function(x){x})

## ----'example_usage_Fpower',include=FALSE---------------------
ex_p <- 30
ex_n <- 1000
ex_snr <- 1.5
dpy <- 253
ex_snr_d <- ex_snr / sqrt(dpy)
c <- ex_p / ex_n
cplus <- (c + ex_snr_d ** 2)
meanv <- sqrt(cplus / (1 - c))
stdv <- sqrt((1 / ex_n) * (ex_snr_d ** 4 + 2 * ex_snr_d ** 2 + c) / (2 * cplus * (1 - c) ** 2))

## ----'haircut_study_load',echo=FALSE,cache=FALSE--------------
# MOCK it up.
fname <- system.file('extdata','haircut_study.rda',package='SharpeR')
if (fname == "") {
	fname <- 'haircut_study.rda'
}
# poofs n.sim,n.stok,n.yr,n.obs,zeta.s,ope,hcuts
load(fname)

medv.true <- median(hcuts)
med.snr.true <- zeta.s * (1 - medv.true)

## ----'haircut_study_mock',eval=FALSE,echo=TRUE----------------
#  require(MASS)
#  
#  # simple markowitz.
#  simple.marko <- function(rets) {
#  	mu.hat <- as.vector(apply(rets,MARGIN=2,mean,na.rm=TRUE))
#  	Sig.hat <- cov(rets)
#  	w.opt <- solve(Sig.hat,mu.hat)
#  	retval <- list('mu'=mu.hat,'sig'=Sig.hat,'w'=w.opt)
#  	return(retval)
#  }
#  # make multivariate pop. & sample w/ given zeta.star
#  gen.pop <- function(n,p,zeta.s=0) {
#  	true.mu <- matrix(rnorm(p),ncol=p)
#  	#generate an SPD population covariance. a hack.
#  	xser <- matrix(rnorm(p*(p + 100)),ncol=p)
#  	true.Sig <- t(xser) %*% xser
#  	pre.sr <- sqrt(true.mu %*% solve(true.Sig,t(true.mu)))
#  	#scale down the sample mean to match the zeta.s
#  	true.mu <- (zeta.s/pre.sr[1]) * true.mu
#    X <- mvrnorm(n=n,mu=true.mu,Sigma=true.Sig)
#  	retval = list('X'=X,'mu'=true.mu,'sig'=true.Sig,'SNR'=zeta.s)
#  	return(retval)
#  }
#  # a single simulation
#  sample.haircut <- function(n,p,...) {
#  	popX <- gen.pop(n,p,...)
#  	smeas <- simple.marko(popX$X)
#  	# I have got to figure out how to deal with vectors...
#  	ssnr <- (t(smeas$w) %*% t(popX$mu)) / sqrt(t(smeas$w) %*% popX$sig %*% smeas$w)
#  	hcut <- 1 - (ssnr / popX$SNR)
#  	# for plugin estimator, estimate zeta.star
#  	asro <- sropt(z.s=sqrt(t(smeas$w) %*% smeas$mu),df1=p,df2=n)
#  	zeta.hat.s <- inference(asro,type="KRS")  # or 'MLE', 'unbiased'
#  	return(c(hcut,zeta.hat.s))
#  }
#  
#  # set everything up
#  set.seed(as.integer(charToRaw("496509a9-dd90-4347-aee2-1de6d3635724")))
#  ope <- 253
#  n.sim <- 4096
#  n.stok <- 6
#  n.yr <- 4
#  n.obs <- ceiling(ope * n.yr)
#  zeta.s <- 1.20 / sqrt(ope)   # optimal SNR, in daily units
#  
#  # run some experiments
#  experiments <- replicate(n.sim,sample.haircut(n.obs,n.stok,zeta.s))
#  hcuts <- experiments[1,]

## ----'haircutting',fig.cap=paste("Q-Q plot of",n.sim,"simulated haircut values versus the approximation given by \\eqnref{hcut_apx} is shown.")----
print(summary(hcuts))
# haircut approximation in the equation above
qhcut <- function(p, df1, df2, zeta.s, lower.tail=TRUE) {
	atant <- atan((1/sqrt(df1-1)) * 
		qt(p,df=df1-1,ncp=sqrt(df2)*zeta.s,lower.tail=!lower.tail))
	# a slightly better approximation is:
	# retval <- 1 - sin(atant - 0.0184 * zeta.s * sqrt(df1 - 1))
	retval <- 1 - sin(atant)
}
# if you wanted to look at how bad the plug-in estimator is, then
# uncomment the following (you are warned):
# zeta.hat.s <- experiments[2,];                                   
# qqplot(qhcut(ppoints(length(hcuts)),n.stok,n.obs,zeta.hat.s),hcuts,
# 			 xlab = "Theoretical Approximate Quantiles", ylab = "Sample Quantiles");
# qqline(hcuts,datax=FALSE,distribution = function(p) { qhcut(p,n.stok,n.obs,zeta.hat.s) },
# 			 col=2)

# qqplot;
qqplot(qhcut(ppoints(length(hcuts)),n.stok,n.obs,zeta.s),hcuts,
			 xlab = "Theoretical Approximate Quantiles", ylab = "Sample Quantiles")
qqline(hcuts,datax=FALSE,distribution = function(p) { qhcut(p,n.stok,n.obs,zeta.s) },
			 col=2)

## ----'hcut_moments',echo=FALSE,results='asis'-----------------
mc.med <- median(hcuts)
mc.mean <- mean(hcuts)
mc.sd <- sd(hcuts)

mc.p <- n.stok
mc.n <- n.obs
mc.zs <- zeta.s

fit.med <- 1 - sin(atan(mc.zs * sqrt(mc.n / (mc.p - 1))))
fit.mean <- 1 - sqrt(1 - (mc.p / (mc.p + mc.n * mc.zs^2)))
fit.sd <- sqrt(mc.p) / (mc.p + (mc.n * mc.zs^2) ^ 1.08)

fit.df <- data.frame(Monte.Carlo=c(mc.med,mc.mean,mc.sd),
		approximation=c(fit.med,fit.mean,fit.sd))
rownames(fit.df) <- c("median","mean","standard deviation")

# 2FIX: start here;yy
xres <- xtable(fit.df,label="tab:hcutfit",
	caption=paste("Empirical approximate values of the median, mean, and",
	"standard deviation of the haircut distribution are given for",
	n.sim,"Monte Carlo simulations of",n.obs,"days of Gaussian data for",n.stok,
	"assets with $\\psnropt=",zeta.s * sqrt(ope),"\\yrtomhalf$.",
	"The approximations from \\eqnref{hcut_moment_appx} are also reported."))

print(xres,include.rownames=TRUE,sanitize.text.function=function(x){x})



## ----'pos_cons_example', include=FALSE, warning=FALSE, message=FALSE----
base.opt.1 <- function(rho,c1,c2) {
# compute v1, v2 that solve
#
#   min      v1^2 + v2^2
#   st.         v1 >= c1
#     -rho v1 + v2 >= c2 
# 
# return as a list?
# the prospective solutions are
# (0,0), (c1,0), (-2rho c2/(1+rho^2),c2(1-rho^2)/(1+rho^2)),
# and (c1,c2+rho*c1)
	rho2 <- rho^2
	oneprho2 <- 1 + rho2;
	solns <- matrix(c(0,0, c1,0, -2*rho*c2/(oneprho2),c2*(1-rho^2)/(oneprho2),
c1,c2+rho*c1),nrow=2)
	feasible <- solns[1,] >= c1
	solns <- solns[,feasible]
	feasible <- -rho * solns[1,] + solns[2,] >= c2
	solns <- solns[,feasible]
	solns <- matrix(solns,nrow=2)

	vals <- colSums(solns ^ 2)
	retval <- solns[,which.min(vals)]
	return(retval)
}
base.opt.2 <- function(rho,psi1,psi2) {
# compute lam1, lam2 that solve
# 
#   min     [lam1,lam2] * R^-1 [lam1,lam2]'
#   st.     R^-1 ([psi1,psi2]' + [lam1,lam2]') >= 0
#
#   where   R = [1,rho]
#               [rho,1]
#    so  R^-1 = [1, -rho]
#               [-rho, 1] / (1-rho^2)
#
# and then return R^-1 ([psi1,psi2]' + [lam1,lam2]')
	c1 <- rho * psi2 - psi1;
	c2 <- rho * psi1 - psi2;
	rv <- base.opt.1(rho,c1,c2)
	lam2 <- rv[2] / (1-rho^2)
	lam1 <- rv[1] + rho * lam2
	nmu1 <- psi1 + lam1
	nmu2 <- psi2 + lam2
	# deal with roundoff
	w1 <- max(0,nmu1 - rho * nmu2)
	w2 <- max(0,-rho * nmu1 + nmu2)
	return(c(w1,w2))
}
base.opt.3 <- function(mu1,mu2,sig1,sig12,sig2) {
# compute w1, w2 to solve
#
#   max   [w1,w2] * [mu1,mu2]'  / sqrt([w1,w2] * Sig * [w1,w2]')
#   st.   w1 >= 0
#         w2 >= 0
#
#  where  Sig = [sig1   sig12]
#               [sig12   sig2]
	rho <- sig12 / sqrt(sig1 * sig2)
	psi1 <- mu1 / sqrt(sig1)
	psi2 <- mu2 / sqrt(sig2)
	rv <- base.opt.2(rho,psi1,psi2)
	return(rv)
}
# test it
base.opt.3(1,0.5,1,0.4,1);
base.opt.3(1,0.5,1,0.5,1);
base.opt.3(1,0.5,1,0.6,1);
base.opt.3(1,0.5,1,0.8,1);

base.opt.3(1,0.5,1,-0.8,1);
opt.pos.T2 <- function(mu1,mu2,sig1,sig12,sig2) {
# compute the maximum value of 
#
#   max   [w1,w2] * [mu1,mu2]'  / sqrt([w1,w2] * Sig * [w1,w2]')
#   st.   w1 >= 0
#         w2 >= 0
	rv <- base.opt.3(mu1,mu2,sig1,sig12,sig2)
	w <- as.vector(rv)
	mu <- as.vector(c(mu1,mu2))
	Sig <- matrix(c(sig1,sig12,sig12,sig2),nrow=2)
	T <- (mu %*% w) / sqrt(t(w) %*% Sig %*% w)
	return(T^2)
}

	



## ----'me_vs_bjones',echo=TRUE---------------------------------
nday <- 1024
nstk <- 5

# under the null: all returns are zero mean;
set.seed(as.integer(charToRaw("7fbb2a84-aa4c-4977-8301-539e48355a35")))
rets <- matrix(rnorm(nday * nstk),nrow=nday)

# t-stat via Britten-Jones procedure
bjones.ts <- function(rets) {
	ones.vec <- matrix(1,nrow=dim(rets)[1],ncol=1)
	bjones.mod <- lm(ones.vec ~ rets - 1)
	bjones.sum <- summary(bjones.mod)
	retval <- bjones.sum$coefficients[,3]
}
# wald stat via inverse second moment trick
ism.ws <- function(rets) {
# flipping the sign on returns is idiomatic,
	asymv <- ism_vcov(- rets)
	asymv.mu <- asymv$mu[1:asymv$p]
	asymv.Sg <- asymv$Ohat[1:asymv$p,1:asymv$p]
	retval <- asymv.mu / sqrt(diag(asymv.Sg))
}

bjones.tstat <- bjones.ts(rets)
ism.wald <- ism.ws(rets)

# compare them:
print(bjones.tstat)
print(ism.wald)

# repeat under the alternative;
set.seed(as.integer(charToRaw("a5f17b28-436b-4d01-a883-85b3e5b7c218")))
zero.rets <- t(matrix(rnorm(nday * nstk),nrow=nday))
mu.vals <- (1/sqrt(253)) * seq(-1,1,length.out=nstk) 
rets <- t(zero.rets + mu.vals)

bjones.tstat <- bjones.ts(rets)
ism.wald <- ism.ws(rets)

# compare them:
print(bjones.tstat)
print(ism.wald)