#' @title Implements Combined Backfill for Multiple Asset Groups
#'
#' @description This function implements Combined Backfill for Multiple Asset
#' Groups
#'
#' @importFrom zoo coredata index
#' @importFrom xts xts
#' @importFrom moments skewness kurtosis
#' @importFrom stats as.formula coef lm resid sd cov
#'
#' @param dat_xts xts object containing the returns data for multiple assets with unequal return history.
#'
#'
#' @return This function returns the combined backfilled (CBF) dataset.
#'
#' @author Pushpak Sarkar
#'
#' @references
#' Jiang, Y. and Martin, R. D. (2016). "Turning Long and Short Return Histories into Equal Histories: A Better Way to Backfill Returns", https://ssrn.com/abstract=2833057.
#'
#' @rdname constructCBFData
#' @export
constructCBFData <- function(dat_xts){
# Convert the data to xts object
# dat_xts <- xts(dat_mat[, -1], order.by = as.Date(dat_mat[, 1], "%m/%d/%Y"))
# Keep a copy of original data - will be updated with fitted values for the
# missing portion
fitted_xts <- dat_xts
# Find which columns have 'NA' values; so these columns have shorter histtory
miss_hist_var <- colnames(dat_xts)[apply(dat_xts, 2, anyNA)]
# Find which column has full history
long_hist_var <- setdiff(colnames(dat_xts), miss_hist_var)
# Find how many observatins are missing for each short history columns
na_count <- apply(dat_xts, 2, function(x) sum(is.na(x)))
na_count <- as.matrix(na_count)
colnames(na_count) <- "numberMissing"
# Alternative way to find the full history column
#long_hist_var <- rownames(na_count)[na_count[, "numberMissing"]==0]
# Drop full history group from 'na_count' as 'NA' count for this group is zero
na_count <- na_count[miss_hist_var, "numberMissing", drop=F]
# Sort count of missing obs from smallest to largest
miss_itr <- sort(unique(na_count[,"numberMissing"]))
# Full length of the dataset
full_length <- nrow(dat_xts)
new_dat <- dat_xts[, long_hist_var, drop=F]
est_coef <- matrix(NA, nrow = 1+ncol(dat_xts),
ncol = ncol(dat_xts) - length(long_hist_var))
rownames(est_coef) <- c("Intercept", colnames(dat_xts))
colnames(est_coef) <- miss_hist_var
err_mat <- matrix(NA, nrow = nrow(dat_xts), ncol = ncol(dat_xts) - length(long_hist_var))
colnames(err_mat) <- miss_hist_var
# Start with the short history columns which have the smallest number of
# observations missing i.e. this group is the longest short history group. Then
# move on to the next shorter history group and so on.
for (i in miss_itr) {
dep_var <- rownames(na_count)[na_count[, "numberMissing"]==i]
indep_var <- rownames(na_count)[na_count[, "numberMissing"] < i]
indep_var <- c(long_hist_var, indep_var)
regressor_list <- paste(indep_var, collapse = "+")
reg_dat <- dat_xts[, c(indep_var, dep_var), drop=F]
miss_val <- which(is.na(dat_xts[, dep_var[1]]))
for (j in dep_var) {
reg_eqn <- paste(j, "~", regressor_list)
reg <- lm(as.formula(reg_eqn), data=reg_dat)
betas <- as.matrix(coef(reg))
row.names(betas)[1] <- "Intercept"
est_coef[which(row.names(est_coef) %in% row.names(betas)), j] <- betas
err_mat[(i+1):nrow(err_mat), j] <- as.matrix(resid(reg))
}
num_resid <- nrow(as.matrix(resid(reg)))
num_miss <- i
# Stack the 'new_dat' equal to number of residual times
temp_newdat <- matrix(rep(t(coredata(new_dat)), num_resid), ncol= ncol(new_dat),
byrow=TRUE)
colnames(temp_newdat) <- colnames(new_dat)
# Number of times the short_hist_var should be repeated
rep_num <- nrow(temp_newdat)/full_length
dep_dat <- matrix(rep(t(coredata(dat_xts[, dep_var, drop=F])), rep_num),
ncol= length(dep_var), byrow=TRUE)
colnames(dep_dat) <- dep_var
# Merge the existing new_dat and temp_newdat
new_dat <- cbind(temp_newdat, dep_dat)
for (k in dep_var){
X_mat <- cbind(1, new_dat[which(is.na(new_dat[, k])), indep_var, drop=F])
betas <- est_coef[c("Intercept", indep_var), k, drop=F]
new_dat[which(is.na(new_dat[, k])), k] <- X_mat%*%betas
}
# Crate the start index of each repeating block
rep_indx <- seq(from = miss_val[1]-1, by = nrow(new_dat)/num_resid,
length.out = num_resid)
# Add the respective residual to the fitted values of short history vars
for (m in 1:length(rep_indx)) {
for (n in 1:num_miss) {
new_dat[rep_indx[m]+n, dep_var] <- new_dat[rep_indx[m]+n, dep_var] + err_mat[m+num_miss, dep_var]
}
}
}
return(new_dat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.