#' Implement Stamgaugh Covariance estimate for multiple starting dates
#'
#' @details
#' This method takes in data as a matrix or an xts object where multiple
#' time series with different starting dates are merged together. It then
#' computes a covariance estimator as described in Stambugh (1997). Covariance
#' estimate can also be robustified
#'
#' @param R xts or matrix of asset returns
#' @param ... allows passing additional paramters
#' @importFrom factorAnalytics fitTsfm
#' @import RCurl robust robustbase xts zoo CerioliOutlierDetection
#' @author Rohit Arora
#' @export
#'
#'
stambaugh.est <- function(R,...) {
# Given long data and a short data, fit a time series factor model
# to the short data with truncated long data as factors
covariance.est <- function(rlong, rshort, loc.long, cov.long, ...) {
long <- as.matrix(rlong)
short <- as.matrix(rshort)
short <- short[complete.cases(short),,drop=FALSE]
s <- nrow(short)
trunc.long <- tail(long,s)
.data <- na.omit(cbind(trunc.long,short))
add.args <- as.list(substitute(list(...)))[-1L]
if(add.args$fit.method == "LS" &&
"control" %in% names(add.args)) add.args[["control"]] <- NULL
args <- list(asset.names=colnames(short),
factor.names=colnames(trunc.long), data=.data)
args <- merge.list(args,add.args)
fit <- do.call(fitTsfm, args)
B <- as.matrix(fit$beta)
resid <- do.call(cbind,lapply(fit$asset.fit,residuals))
resid.cov <- if(robust) {
if(ncol(resid) == 1) fit$resid.sd^2
else cerioli2010.fsrmcd.test(resid, mcd.alpha = mcd.alpha)$sigma.hat.rw
} else {
cov(resid)
}
loc.short <- as.matrix(fit$alpha) + B %*% loc.long
loc.est <- rbind(loc.long,loc.short)
cov.short.long <- B %*% cov.long
cov.long.short <- t(cov.short.long)
cov.short.short <- resid.cov + B %*% cov.long %*% t(B)
cov.est <- cbind(rbind(cov.long,cov.short.long),
rbind(cov.long.short,cov.short.short))
rownames(cov.est) <- colnames(cov.est) <- NULL
list(loc=loc.est, cov=cov.est)
}
data.m <- as.matrix(R)
#remove rows with all NA
data.m <- data.m[rowSums(is.na(data.m))!=ncol(data.m), ]
if ( nrow(data.m) == 0 ) return(NA)
col.names <- colnames(data.m)
add.args <- as.list(substitute(list(...)))[-1L]
robust <- ifelse("fit.method" %in% names(add.args),
ifelse(add.args$fit.method == "Robust",TRUE, FALSE),
FALSE)
cov.estim <- "mcd"; control <- covRob.control("mcd")
if(robust) {
if("estim" %in% names(add.args))
cov.estim <- add.args[["estim"]] else add.args[["estim"]] <- cov.estim
if("control" %in% names(add.args))
control <- add.args[["control"]] else add.args[["control"]] <- control
mcd.alpha <- control$alpha
}
# Idea is to sort columns from maximum to minum data. Apply the long-short
# routine and use its estimate for the next step. In the next step use the
# the previously computed covariance estimate as estimate for long data and
# compute a new estimate by regressing short data on long data
# We want to optimize this procedure by grouping columns that have the same
# length to speed by regression.
# Get the last NA in each column
start <- apply(data.m,2,function(col) which.min(is.na(col)))
# Sorting will change the order of user supplied columns so store the original
# order that can be used on the sorted order
ord.start <- order(start)
old.start <- sapply(1:length(start),function(x) which(ord.start==x))
# sort columns that have maximum data
sort.start <- start[ord.start]
unique.sort.start <- unique(sort.start)
len <- length(unique.sort.start)
# group columns that have the same length
sort.count <- as.numeric(table(sort.start))
cum.sort.count <- cumsum(sort.count)
data.sort <- data.m[,ord.start]
# start by computing the mean and covariance of longest columns that have the same len
temp.data <- data.sort[,1:cum.sort.count[1],drop=FALSE]
val <- NULL
if(robust && ncol(temp.data) > 1)
val <- cerioli2010.fsrmcd.test(temp.data, mcd.alpha = mcd.alpha)
loc.est <- if(robust) {
if(ncol(temp.data)==1) {
loc <- as.numeric(coef(lmRob(temp.data ~ 1)))
names(loc) <- colnames(temp.data)
as.matrix(loc)
}
else as.matrix(val$mu.hat.rw)
} else {
as.matrix(apply(temp.data,2,mean))
}
cov.est <- if(robust) {
if(ncol(temp.data) == 1) scaleTau2(temp.data)^2
else val$sigma.hat.rw
} else {
cov(temp.data)
}
# extract a long block and a short block and let the basic routine do the job.
# Feed its output to the next set of grouped columns
for (j in 1:(len-1)) {
if (len == 1) break;
end <- cum.sort.count[j]
long <- data.sort[,1:end,drop=FALSE]
start <- (1 + end); end <- cum.sort.count[j+1]
short <- data.sort[,start:end,drop=FALSE]
est <- covariance.est(long, short, loc.est, cov.est, add.args)
loc.est <- est$loc; cov.est <- est$cov
}
# lets re-arrange back to return in the user-supplied order
loc.est <- loc.est[old.start,,drop=FALSE]
rownames(loc.est) <- col.names
cov.est <- cov.est[old.start,old.start]
colnames(cov.est) <- col.names
rownames(cov.est) <- col.names
list(data = data.m, loc = loc.est, cov = cov.est,
robust.params = list(control = control))
}
#' Estimate covariance matrices using Stambaugh method for classical and Robust
#' methods
#'
#' @details
#' This method takes in data as a matrix or an xts object where multiple
#' time series with different starting dates are merged together. It then
#' computes a covariance estimator based on the specifed style
#'
#' @param R xts or matrix of asset returns
#' @param ... pass paramters to fitTimeSeriesFactorModel(factorAnalytics),
#' covRob, lmRob (Robust) functions
#' @param method type of model to fit. Takes 3 values classic/robust/truncated
#' @author Rohit Arora
#' @export
#'
#'
stambaugh.fit <- function(R, method=c("classic","robust", "truncated"), ...) {
if(is.null(nrow(R)) || is.null(ncol(R))) stop("Invalid data")
if(length(method) > 2) stop("Can fit atmost 2 models")
if(!all(method %in% c("classic","robust", "truncated"))) stop("Invalid model")
model.classic <- model.robust <- NULL; .data <- NULL
add.args <- list(...)
if("fit.method" %in% names(add.args)) add.args["fit.method"] <- NULL
if("classic" %in% method) {
args <- list(R=R, fit.method="LS")
args <- merge.list(args,add.args)
classic <- do.call(stambaugh.est,args)
.data <- classic$data
.Classical <- list(center = classic$loc, cov = classic$cov,
dist = classic$dist,corr = FALSE, type="Classical")
model.classic <- list(Stambaugh=.Classical)
}
if("robust" %in% method) {
args <- list(R=R, fit.method="Robust")
args <- merge.list(args,add.args)
robust <- do.call(stambaugh.est,args)
.data <- robust$data
.Robust <- list(center = robust$loc,cov = robust$cov,
dist = robust$dist, corr = FALSE,
robust.params = robust$robust.params, type="Robust")
model.robust <- list("Robust Stambaugh"=.Robust)
}
if("truncated" %in% method) {
args <- list(R=na.omit(R), fit.method="LS")
args <- merge.list(args,add.args)
trunc <- do.call(stambaugh.est,args)
.data <- trunc$data
.Trunc <- list(center = trunc$loc,cov = trunc$cov,
dist = trunc$dist, corr = FALSE, type="Classical")
model.trunc <- list(Truncated=.Trunc)
}
model.list <- if (all(method %in% c("classic","robust")))
merge.list(model.classic,model.robust)
else if (all(method %in% c("classic","truncated")))
merge.list(model.classic, model.trunc)
else if (all(method %in% c("robust","truncated")))
merge.list(model.robust, model.trunc)
else if (method == "classic") model.classic
else if (method == "robust") model.robust
else if (method == "truncated") model.trunc
model.list <- list(models = model.list,data = .data)
class(model.list) <- "stambaugh"
model.list
}
#' Plot Ellipsis for the Stambaugh estimator
#'
#' @details
#' This method takes in fitted models for Stamgaugh Estimator. It then plots a
#' comparison of the fitted models using ellipsis plot
#'
#' @param models fitted models for covariance
#' @export
#' @author Rohit Arora
#'
#'
stambaugh.ellipse.plot <- function(models) {
if (length(models) != 2) stop("2 models needed for ellipse plot")
.models <- models$models
plot.covfm(.models, which.plots = 4)
}
#' An internal function that is used to calculate the Mahalanobis distance
#'
#' @details
#' The function takes in the model, data and the significance level and calculated
#' the critical values and the Mahalanobis distance.
#'
#' @param data data used to fit the covariance
#' @param confidence level for the test
#' @param model fitted models for covariance
#' @param id.n number of outliers to show
#' @author Rohit Arora
#'
#'
.stambaugh.dist <- function(data, model, level, id.n=10) {
start <- apply(data, 2,function(col) which.min(is.na(col)))
freq.tab <- data.frame(table(start));
freq.tab$start <- as.numeric(levels(freq.tab$start))
x.thresh <- c(freq.tab$start - 1 , nrow(data))
cum.sort.count <- cumsum(freq.tab$Freq)
levels <- sapply(1:nrow(freq.tab), function(j)
sqrt(qchisq(1 - level, df=cum.sort.count[j],lower.tail=FALSE)))
y.thresh <- levels
outlier <- dist <- matrix(NA, nrow=nrow(data), ncol = 1)
rownames(outlier) <- rownames(dist) <- rownames(data)
for (i in 1:(length(x.thresh)-1)) {
new.start <- freq.tab$start[i]
new.end <- ifelse(i != length(freq.tab$start),
freq.tab$start[i+1] -1, nrow(data))
symdata <- coredata(data[, which(start <= new.start),drop=FALSE])
if(model$type == "Classical") {
fit <- stambaugh.fit(symdata, method = "classic")
symdata <- data[new.start:new.end, which(start <= new.start),drop=FALSE]
dist[new.start:new.end] <- sqrt(mahalanobis(x = symdata,
center = fit$models$Stambaugh$center,
cov= fit$models$Stambaugh$cov))
new.start <- x.thresh[i] + 1; new.end <- x.thresh[i+1]
out <- new.start - 1 + which(dist[new.start:new.end] > y.thresh[i])
}
else if (model$type == "Robust") {
control <- model$robust.params$control
fit <- stambaugh.fit(symdata, method = "robust", control= control)
symdata <- data[new.start:new.end, which(start <= new.start),drop=FALSE]
dist[new.start:new.end] <- sqrt(mahalanobis(x = symdata,
center = fit$models$`Robust Stambaugh`$center,
cov= fit$models$`Robust Stambaugh`$cov))
val <- cerioli2010.fsrmcd.test(symdata, signif.alpha=1-level,
mcd.alpha = control$alpha )
cval <- sqrt(val$critvalfcn(1-level))
y.thresh[i] <- min(cval)
out <- new.start - 1 + which(dist[new.start:new.end] > cval)
}
temp.n <- ifelse(length(out) > id.n, id.n, length(out))
out <- out[order(dist[out], decreasing = TRUE)][1:temp.n]
outlier[out] <- out
}
y.thresh <- c(y.thresh,tail(y.thresh,1))
list(dist=dist, outlier=outlier, x.thresh=x.thresh, y.thresh=y.thresh)
}
#' Compute Mahalanobis distances for each of the data points and plot it against
#' upper 2.5\% chi-square quantile
#'
#' @details
#' This method takes in fitted models for Stamgaugh Estimator. It then uses the
#' distances computed for each stage and plots it against the upper level\%
#' Chi-Square quantile
#'
#' @param model fitted models for covariance
#' @param level value between 0 and 1 giving the chi-squared percent point
#' used to compute threshold for juding a point as an outlier
#' @import ggplot2
#' @author Rohit Arora
#' @export
#'
#'
stambaugh.distance.plot <- function(model, level=0.975) {
data <- model$data
if (ncol(data) == 0) stop("Empty Data")
models <- model$models; n.models <- length(models)
if (n.models == 0) stop("Empty Models")
if("Truncated" %in% names(models)) stop("Truncated data not allowed")
x.thresh <- y.thresh.classical <- y.thresh.robust <- c()
df <- do.call(rbind,lapply(models,
function(model, id.n = 10) {
temp <- .stambaugh.dist(data, model, level, id.n)
x.thresh <<- temp$x.thresh
if(model$type == "Classical") y.thresh.classical <<- temp$y.thresh
else if(model$type == "Robust") y.thresh.robust <<- temp$y.thresh
data.frame(Type = model$type, cbind(temp$dist, temp$outlier))
}))
dates <- try(as.Date(gsub("[A-Za-z ]+\\.","",rownames(df))))
df[,"Date"] <- c(seq(1:nrow(data)),seq(1:nrow(data)))
dateCheckFailed <- ifelse( class( dates ) != "try-error" &&
!is.na( dates ), FALSE, TRUE)
colnames(df) <- c("Type","Distance","Outlier","Date"); rownames(df) <- NULL
p <- ggplot(data=df, aes_string(x='Date',y='Distance')) +
geom_point(aes_string(col='Type', shape='Type')) +
facet_grid(~Type) + geom_text(aes_string(label='Outlier'),hjust=1, vjust=1) +
xlab("Date") +
ylab("Square Root of Mahalanobis Distance") +
theme(strip.text.x = element_text(size = 16),
axis.text=element_text(size=12),
axis.title=element_text(size=14))
for (i in 1:(length(x.thresh)-1)){
data.segm <- data.frame(x = rep(x.thresh[i],2),
y = c(y.thresh.classical[i],y.thresh.robust[i]),
xend = rep(x.thresh[i+1],2),
yend = c(y.thresh.classical[i],y.thresh.robust[i]),
Type = c("Classical","Robust"))
p <- p + geom_segment(data=data.segm,
aes_string(x='x',y='y',yend='yend',xend='xend'),
inherit.aes=FALSE, linetype="dashed", colour="blue")
data.segm <- data.frame(x = rep(x.thresh[i+1],2),
y = c(y.thresh.classical[i],y.thresh.robust[i]),
xend = rep(x.thresh[i+1],2),
yend = c(y.thresh.classical[i+1],y.thresh.robust[i+1]),
Type = c("Classical","Robust"))
p <- p + geom_segment(data=data.segm,
aes_string(x='x',y='y',yend='yend',xend='xend'),
inherit.aes=FALSE, linetype="dashed", colour="blue")
}
ind <- head(floor(seq(1,nrow(data),length.out = 5)),-1)
if(!dateCheckFailed) p <- p + scale_x_discrete(breaks = ind, labels=format(dates[ind],"%Y"))
p <- p + theme(legend.position="none")
options(warn=-1)
print(p)
options(warn=0)
p
}
#' Plot Ellipsis or Distance plot for the Stambaugh estimator
#'
#' @details
#' This method takes in fitted models and a paramter for deciding the type of plot
#'
#' @param x fitted models for covariance
#' @param y takes values 1/2. 1 = Ellipse plot, 2 = distance plot
#' @param ... allows passing additional paramters
#'
#' @method plot stambaugh
#' @author Rohit Arora
#' @export
#'
plot.stambaugh <- function(x, y=c(1,2),...) {
n <- length(x$models)
if (n != 2 && y[1] == 1) stop("2 models needed for ellipse plot")
which <- y[1]
if(!which %in% c(1,2)) stop("Unknown plot selected")
if (which == 1) stambaugh.ellipse.plot(x)
if (which == 2) stambaugh.distance.plot(x,...)
}
#' Plot data to visualize missing values
#'
#' @details
#' This method takes in data as an xts object and plots the data.
#' Missing values highlighted in red for matrix plot and time series of returns
#' are shown in in Summary plot
#'
#' @param data an xts/zoo object
#' @param which takes values 3/4. 3 = Summary plot, 4 = Matrix plot
#' @import VIM reshape2
#' @author Rohit Arora
#' @export
#'
#'
plotmissing <- function(data, which=c(3,4)) {
cols <- colnames(data)
if (length(cols) == 0) stop("Data should have column names")
which <- which[1]
options(warn=-1)
if (which == 3) {
ind <- which.min(apply(is.na(data), 2, which.min))
dates <- index(data[,ind])
d <- melt(coredata(data)); colnames(d) <- c("Index","Symbol","Returns")
symCount <- ncol(data)
#ind <- head(floor(seq(1,nrow(data),length.out = 9)),-1)
year.dates <- format(dates, "%Y")
ind <- sapply(unique(year.dates), function(val)
which.max(year.dates == val))
ind.ind <- seq.int(1, length(ind), length.out = min(10,length(ind)))
ind <- ind[ind.ind]
p <- ggplot(data=d, aes_string(x='Index', y='Returns', colour='Symbol', group='Symbol')) +
geom_line() +
xlab("Dates") + ylab("Returns") +
scale_x_discrete(breaks = ind, labels=year.dates[ind]) +
facet_wrap(~Symbol, ncol=round(sqrt(symCount)), scales = "free_x") +
theme(legend.position="none")
print(p)
}
if(which == 4) {
data <- coredata(data)
if(class(data) == "data.frame") cols <- cols[unlist(lapply(data, is.numeric))]
data <- data[, cols]
colnames(data) <- sapply(cols, function(name) substr(name,1,9))
matrixplot(data, main="Location of Missing values")
}
options(warn=0)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.