#' Function to form panel data from a portfolio equity matrix. A precursor
#' to performing robust regression between portfolio legs.
panelise_rp <- function(rpmat,ind_mat=NULL,to_monthly=TRUE, leading=FALSE){
if(is.null(names(rpmat))){names(rpmat) <- paste(1:ncol(rpmat))}
if(!is.null(ind_mat)){
inames <- names(ind_mat)
if(sum(class(ind_mat)=="data.frame")==0){
tm <- index(ind_mat)
ind_mat <- as.data.frame(ind_mat)
ind_mat$date <- tm
if(to_monthly){
ind_mat$date <- zoo::as.yearmon(ind_mat$date)
}
}
}
rp_list <- list()
for (i in 1:ncol(rpmat)){
x <- rpmat[,i]
if(to_monthly){
x <- period_return(x,leading=leading)
index(x) <- as.yearmon(index(x))
} else{
x <- zoo::na.trim(x)
x <- PerformanceAnalytics::Return.calculate(x,method = "discrete")
}
names(x) <- "rp"
tm <- index(x)
x <- as.data.frame(x,row.names = FALSE)
if(grepl("POSIX",class(tm)[1])){tm <- as.Date(tm)}
x$date <- tm
x$portfolio <- names(rpmat)[i]
if(!is.null(ind_mat)){
x <- merge(x,ind_mat,by="date")
}
rp_list[[i]] <- x[,c("date",names(x)[names(x)!="date"])]
}
rp_list <- do.call(rbind,rp_list)
return(na.omit(rp_list))
}
#' Function to load Fama French 3 factor data
load_fama_french <- function(ffdir,as_df=TRUE,freq="monthly"){
if(freq=="daily"){
ff3 <- readr::read_csv(ffdir,skip = 3)
names(ff3)[1] <- "date"
names(ff3)[grep("Mkt-",names(ff3),ignore.case = TRUE)] <- "Mkt_RF"
ff3$date <- ymd(ff3$date)
ff3 <- xts(ff3[,names(ff3)!="date"],order.by = ff3$date)
return(ff3/100)
}
ff3 <- readr::read_csv(ffdir,skip = 3,n_max = 663)
names(ff3)[1] <- "date"
names(ff3)[grep("Mkt-",names(ff3),ignore.case = TRUE)] <- "Mkt_RF"
# append day to %Y%m format to allow conversion
# to date
ff3$date <- paste0(ff3$date,"01")
ff3$date <- lubridate::ymd(ff3$date)
ff3[,names(ff3)!="date"] <- ff3[,names(ff3)!="date"]/100
if (as_df) {
ff3$date <- zoo::as.yearmon(ff3$date)
return(ff3)}
ff3 <- xts(ff3[,names(ff3)!="date"],order.by = ff3$date)
index(ff3) <- zoo::as.yearmon(index(ff3))
return(ff3)
}
#' Function to calculate and tabulate the mean return for every quantile
#' portfolio in a double-sorted portfolio formation procedure. The returns
#' to a long-short portfolio of the most extreme second layer quantiles within
#' each first-layer quantile is also provided.
cross_quantile_table <- function(Vars,groupings,K_m,hhll=FALSE,DF_rets,
verbose=TRUE,remove_period=FALSE,date1=NULL,
date2=NULL,whole_months=TRUE){
# Get groupings/ranks
ranks <- multi_rank(Vars=Vars, groupings = groupings)
# Get weight matrices for each group
weight_list <- list()
l=1
for (i in 1:groupings[1]){
for (j in 1:groupings[2]){
x <- get_weights_mr(ranks,long_g = c(i,j),long_only = TRUE)
weight_list[[l]] <- x
names(weight_list)[l] <- paste0(names(Vars)[1],i,names(Vars)[2],j)
l <- l+1
}
# Get long-short weights
x <- get_weights_mr(ranks,long_g = c(i,1),short_g=c(i,groupings[2]))
weight_list[[l]] <- x
names(weight_list)[l] <- paste0(names(Vars)[1],
i,names(Vars)[2],"1-",names(Vars)[1],i,
names(Vars)[2],groupings[2])
l <- l+1
}
if(hhll){
#get_extremes
x <- get_weights_mr(ranks,long_g=c(1,1),
short_g=c(groupings[1],groupings[2]))
weight_list[[l]] <- x
names(weight_list)[l] <- paste0(names(Vars)[1],"1",
names(Vars)[2],"1-",names(Vars)[1],
groupings[1],names(Vars)[2],groupings[2])
}
# Perform simulations for each weight matrix
rp_list <- list()
for (w in 1:length(weight_list)){
weights=weight_list[[w]]
cores <- detectCores()
mycluster <- makeCluster(cores-1,type = "FORK")
registerDoParallel(mycluster)
out <- foreach(start_i = 1:K_m) %dopar% {
wts_i <- weights_i(weights, start_i, K_m, holding_time=FALSE )[[1]]
rp <- rp_cum_rets(wts_i,DF_rets,stop_loss = 0.9)
initEq*rp
}
stopCluster(mycluster)
names(out) <- paste0("portfolio",1:K_m)
rp <- do.call(cbind,out)
#rp <- xts(rowSums(rp),order.by = index(rp))
rp_list[[w]] <- rp
}
if(remove_period){
for (i in 1:length(rp_list)){
rp_list[[i]] <- gfc_adjust(rp_list[[i]],weights=weight_list[[i]],
K_m=K_m,date1,date2,whole_months)
}
}
# Get mean monthly stats
tab <- sapply(rp_list,mean_return,leading=TRUE)
ps <- tab[3,]
tab[1:2,] <- format(round(tab[1:2,],4),scientific = FALSE)
tab <- apply(tab,2,FUN=function(x) sub(".*? (.+)", "\\1",x))
tab[1,ps < 0.05] <- paste0(tab[1,ps < 0.05],"*")
tab[1,ps < 0.01] <- paste0(tab[1,ps < 0.01],"*")
tab[1,ps < 0.001] <- paste0(tab[1,ps < 0.001],"*")
tab[1,] <- paste0(tab[1,]," (",tab[2,],")")
if(hhll){
hh_ll <- tab[1,ncol(tab)]
tab <- tab[,-ncol(tab)]
}
tab <- matrix(tab[1,],nrow=groupings[1],byrow=TRUE)
colnames(tab) <- paste(1:ncol(tab))
colnames(tab)[1:groupings[2]] <- paste0(names(Vars)[2],1:groupings[2])
colnames(tab)[groupings[2]+1] <- paste0(colnames(tab)[1],"-",
colnames(tab)[groupings[2]])
rownames(tab) <- paste0(names(Vars)[1],1:nrow(tab))
if(hhll){
tab <- rbind(tab,c(hh_ll,rep("",ncol(tab)-1)))
rownames(tab)[nrow(tab)] <- paste0("11-",groupings[1],groupings[2])
}
if(verbose){
names(rp_list) <- names(weight_list)
return(list(res_table=tab, portfolios=rp_list))
}
return(tab)
}
#' Function to calculate and tabulate the mean *equity* return for every quantile
#' portfolio in a single-sorted portfolio procedure.
inter_quantile_table_eq <- function(Vars,groupings,DF_rets,to_monthly=TRUE,diff=TRUE,
verbose=FALSE){
# Get groupings/ranks
ranks <- multi_rank(Vars=Vars, groupings = groupings)
# Get weight matrices for each group
weight_list <- list()
v1 <- ranks[[1]]
l=1
for (i in 1:groupings[1]){
lo <- v1[index(v1), ] == i
lo[index(lo),] <- as.numeric(lo)
lo <- na.fill(lo/rowSums(lo),0)
weight_list[[l]] <- lo
l <- l+1
}
if(diff){ # Get long-short weights
x <- weight_list[[1]]-weight_list[[groupings[1]]]
weight_list[[l]] <- x
}
# Perform simulations for each weight matrix
rp_list <- list()
for (w in 1:length(weight_list)){
weights=weight_list[[w]]
cores <- detectCores()
mycluster <- makeCluster(cores-1,type = "FORK")
registerDoParallel(mycluster)
out <- foreach(start_i = 1:K_m) %dopar% {
wts_i <- weights_i(weights, start_i, K_m, holding_time=FALSE )[[1]]
rp <- rp_cum_rets(wts_i,DF_rets,stop_loss = 0.9)
rp
}
stopCluster(mycluster)
names(out) <- paste0("portfolio",1:K_m)
rp <- do.call(cbind,out)
rp <- xts(rowSums(rp),order.by = index(rp))
rp_list[[w]] <- rp
}
# Convert to monthly returns
if(to_monthly){
rp_list <- lapply(rp_list,
function(x) quantmod::monthlyReturn(na.omit(x),
leading=TRUE))
}
rp_list <- do.call(cbind,rp_list)
names(rp_list) <- names(weight_list)
# Get portfolio statistics
mean_list <- colMeans(rp_list)
sd_list <- apply(rp_list,2,sd)/sqrt(nrow(rp_list))
t_list <- abs(mean_list)/sd_list
p_list <- (1-pt(t_list,df=nrow(rp_list)-1))*2
# Create table
print_list <- paste0(format(round(mean_list,4),scientific=FALSE),
" ", "(", format(round(t_list,4),scientific = FALSE),")")
print_list[p_list<0.05] <- paste0(print_list[p_list<0.05],"*")
print_list[p_list<0.01] <- paste0(print_list[p_list<0.01],"*")
ret_mat <- matrix(print_list,nrow = 1)
colnames(ret_mat) <- c(paste0(names(Vars)[1],1:groupings[1]),
paste0(names(Vars),"1","-",names(Vars),groupings[1]))
if(verbose){
names(mean_list) <- names(t_list) <- names(p_list) <- colnames(ret_mat)
return(list(res_table=ret_mat,means=mean_list,stat=t_list,ps=p_list,
portfolios=rp_list))
}
return(ret_mat)
}
#' Function to calculate and tabulate the mean return for every quantile
#' portfolio in a single-sorted portfolio procedure.
inter_quantile_table <- function(Vars,groupings,K_m,to_monthly=TRUE,diff=TRUE,
verbose=FALSE,stop_loss=0.9,DF_rets,
remove_period=FALSE,date1=NULL,date2=NULL,
whole_months=TRUE){
# Get groupings/ranks
ranks <- multi_rank(Vars=Vars, groupings = groupings)
# Get weight matrices for each group
weight_list <- list()
v1 <- ranks[[1]]
l=1
for (i in 1:groupings[1]){
lo <- v1[index(v1), ] == i
lo[index(lo),] <- as.numeric(lo)
lo <- na.fill(lo/rowSums(lo),0)
weight_list[[l]] <- lo
l <- l+1
}
if(diff){ # Get long-short weights
x <- weight_list[[1]]-weight_list[[groupings[1]]]
weight_list[[l]] <- x
}
# Perform simulations for each weight matrix
rp_list <- list()
for (w in 1:length(weight_list)){
weights=weight_list[[w]]
cores <- detectCores()
mycluster <- makeCluster(cores-1,type = "FORK")
registerDoParallel(mycluster)
out <- foreach(start_i = 1:K_m) %dopar% {
wts_i <- weights_i(weights, start_i, K_m, holding_time=FALSE )[[1]]
rp <- rp_cum_rets(wts_i,DF_rets,stop_loss = stop_loss)
rp
}
stopCluster(mycluster)
names(out) <- paste0("portfolio",1:K_m)
rp <- do.call(cbind,out)
rp_list[[w]] <- rp
}
# Remove outlier periods (e.g, GFC) if applicable.
if(remove_period){
for (i in 1:length(rp_list)){
rp_list[[i]] <- gfc_adjust(rp_list[[i]],weights=weight_list[[i]],
K_m=K_m,date1,date2,whole_months)
}
}
# Get mean monthly stats
tab <- mean_return_table(rp_list,leading = TRUE)
tab <- data.frame(t(tab),stringsAsFactors = FALSE)
# remove p-val
tab[,2] <- sub(".*? (.+)", "\\1", tab[,2])
tab[,1] <- paste(tab[,1],tab[,2])
tab <- data.frame(tab[,1])
rownames(tab) <- paste(1:nrow(tab))
rownames(tab)[1:groupings[1]] <- paste0(names(Vars)[1],1:groupings[1])
if(diff){rownames(tab)[groupings[1]+1] <- paste0(rownames(tab)[1],"-",
rownames(tab)[groupings[1]])}
colnames(tab) <- "mean (t-stat)"
if(verbose){
return(list(res_table=tab, portfolios=rp_list))
}
return(tab)
}
#' Function to display the mean, t-statistic and p-vals for each column of a
#' equity time series as a table.
mean_return_table_eq <- function(comp_mat,sig=4){
nms <- names(comp_mat)
mean_list <- colMeans(comp_mat)
sd_list <- apply(comp_mat,2,sd)/sqrt(nrow(comp_mat))
t_list <- abs(mean_list/sd_list)
p_list <- (1-pt(t_list,df=nrow(comp_mat)-1))*2
# Construct strings for printing
pt_list <- paste0("(",format(round(t_list,sig),scientific=FALSE),")")
pp_list <- format(round(p_list,sig),scientific=FALSE)
pt_list <- paste(pp_list,pt_list)
pt_list[p_list<0.05] <- paste0(pt_list[p_list<0.05],"*")
pt_list[p_list<0.01] <- paste0(pt_list[p_list<0.01],"*")
mean_list <- format(round(mean_list,sig),scientific = FALSE)
# Create table
table1 <- matrix(c(mean_list,pt_list),byrow=TRUE,
ncol=ncol(comp_mat))
colnames(table1) <- nms
rownames(table1) <- c("Mean","p (t-stat)")
as.data.frame(table1)
}
#' Function to display the mean, t-statistic and p-vals for each column of a
#' equity time series as a table.
mean_return_table <- function(rps,verbose=FALSE,sig=4,leading=FALSE){
nms <- names(rps)
if(is.null(nms)){nms <- paste0("P",1:length(rps))}
tab <- sapply(rps,mean_return,leading=leading)
ps <- tab[3,]
tab <- format(round(tab,4),scientific = FALSE)
tab <- apply(tab,2,stringr::str_trim)
tab[3,] <- paste0(tab[3,]," (",tab[2,],")")
tab[1,ps<0.05] <- paste0(tab[1,ps<0.05],"*")
tab[1,ps<0.01] <- paste0(tab[1,ps<0.01],"*")
tab[1,ps<0.001] <- paste0(tab[1,ps<0.001],"*")
tab <- tab[-2,]
tab <- matrix(tab,ncol=length(rps))
rownames(tab) <- c("mean","p (t-stat)")
colnames(tab) <- nms
if(verbose){
num=t(sapply(rps,mean_return,leading=leading))
colnames(num) <- c("mean","t-stat","p-val")
return(list(res_mat=data.frame(tab),num=num))
}
return(data.frame(tab,stringsAsFactors = FALSE))
}
# Function to get the mean buy-and-hold (portfolio) response to an event,
# as opposed to the continuously equal-weighted response of portfolio firms.
event_response_bh <- function(weights,df_rets,horizon,on="months",stop_loss=NULL){
cols <- max(diff(endpoints(df_rets,on=on,k=1)))*horizon
col_min <- min(diff(endpoints(df_rets,on=on,k=horizon)))
long_mat <- xts(matrix(ncol=cols,nrow=nrow(weights)),order.by=index(weights))
short_mat <- long_mat
ls_mat <- long_mat
panel_mat <- long_mat[,1:3]
names(panel_mat) <- c("long_side","short_side","portf")
if(on=="weeks"){
f <- weeks
} else if (on=="months"){
f <- months
}
for (i in 1:nrow(weights)){
if(sum(weights[i,] != 0)==0){next()}
sdate <- as.Date(index(weights)[i])
edate <- sdate %m+% f(horizon)
edate <- ceiling_date(edate,"months")-days(1)
if(edate > (index(df_rets)[nrow(df_rets)]+days(5))){break()}
sdate <- sdate+days(1)
drange <- paste0(sdate,"::",edate)
lm <- 0
sm <- 0
wts_sum_l=0
wts_sum_s=0
long_syms <- names(weights)[weights[i,] > 0]
short_syms <- names(weights)[weights[i,] < 0]
if(length(short_syms)!=0){
short_wts <- as.numeric(abs(weights[i,short_syms]))
wts_sum_s <- sum(short_wts)
sm <- apply(cumprod(1+df_rets[drange,short_syms]),1,
weighted.mean,w=short_wts) # Buy and hold
sm <- -(sm)
}
if(length(long_syms)!=0){
long_wts <- as.numeric(abs(weights[i,long_syms]))
wts_sum_l <- sum(long_wts)
lm <- apply(cumprod(1+df_rets[drange,long_syms]),1,
weighted.mean,w=long_wts) # Buy and hold
}
lsm <- wts_sum_l*lm+wts_sum_s*sm
if(!is.null(stop_loss)){
if(min(lsm)<=-1*stop_loss){
wls <- which(lsm <= -1*stop_loss)[1]
hold_ret_ls <- lsm[wls]
lsm[(wls+1):length(lsm)] <- NA
sm[(wls+1):length(sm)] <- NA
lm[(wls+1):length(lm)] <- NA
hold_ret_l <- (tail(na.omit(lm),1)-1)*wts_sum_l
hold_ret_s <- (tail(na.omit(sm),1)+1)*wts_sum_s
} else{
hold_ret_l <- (tail(lm,1)-1)*wts_sum_l
hold_ret_s <- (tail(sm,1)+1)*wts_sum_s
hold_ret_ls <- tail(lsm,1)
}
} else{
hold_ret_l <- (tail(lm,1)-1)*wts_sum_l
hold_ret_s <- (tail(sm,1)+1)*wts_sum_s
hold_ret_ls <- tail(lsm,1)
}
sdate <- as.Date(index(weights)[i])
long_mat[sdate,1:length(lm)] <- lm*wts_sum_l - wts_sum_l
short_mat[sdate,1:length(sm)] <- sm*wts_sum_s + wts_sum_s
ls_mat[sdate,1:length(lsm)] <- lsm + (wts_sum_s-wts_sum_l)
panel_mat[sdate,1:3] <- c(hold_ret_l,hold_ret_s,hold_ret_ls)
}
l <- c(0,colMeans(long_mat,na.rm = TRUE))
s <- c(0,colMeans(short_mat,na.rm = TRUE))
ls_ <- c(0,colMeans(ls_mat,na.rm = TRUE))
x <- as.data.frame(cbind(l,s,ls_))
names(x) <- c("long","short","long_short")
x$days <- 1:nrow(x)
x <- x[1:col_min,]
return(list(event_mat=x,panel=panel_mat,
long_mat=long_mat,short_mat=short_mat,ls_mat=ls_mat))
}
#' Function to get the residual series for a given period-by-period regression
#' equation and append to existing data set or flatten to time series for
#' ranking.
create_residual <- function(pmat,fmla,flatten=TRUE,
id="ticker",ind,assets=NULL){
# Get relevant variable names for complete.cases
var_names <- intersect(vnames(fmla),names(pmat))
# Make name for residual variable and make sure not taken
v_name <- paste0(var_names[1],"_res")
while(v_name %in% names(pmat)){
v_name <- paste0(v_name,"x")
}
udates <- sort(unique(pmat$date))
df_list <-list()
for (i in 1:length(udates)){
subdf <- dplyr::filter(pmat,date==udates[i])
fl <- tryCatch(lm(fmla,subdf), error=function(e) integer(1))
if (is.integer(fl)) {next()}
subdf[complete.cases(subdf[,var_names]),v_name] <- fl$residuals
df_list[[i]] <- subdf
}
df <- do.call(rbind,df_list)
rm(df_list)
if (flatten){
# Find time column
tcol=which(sapply(1:ncol(df),
FUN=function(x) class(df[,x])) %in%
c("Date","POSIXct","POSIXt","yearmon"))[1]
# Subset for spreading
Index=c(names(df)[tcol],id)
# Spread the residual data into format used for ranking and position prep
d_res <- tidyr::spread(df[,c(Index,v_name)],id,v_name)
# Assets with less than J_m periods of data will not be in the panel data.frame
if(!is.null(assets)){
no_data <- assets[!(assets %in% names(d_res))]
# Create empty data set for these assets
cmat <- matrix(nrow = nrow(d_res),ncol = length(no_data),
dimnames = list(NULL,no_data))
# Merge with rest of data
d_res <- cbind(d_res,cmat)
# Put columns in "correct" order (to match 'assets' i.e. to match other objects)
d_res <- xts(d_res[,assets],order.by=d_res[,Index[1]])
} else {
d_res <- xts(d_res[,-which(names(d_res)==Index[1])],
order.by=d_res[,Index[1]])
}
# Align index
if(!is.null(ind)){
d_res <- cbind(d_res,ind[endpoints(ind,"months",k=1)])
}
if(!is.null(assets)){
names(d_res) <- assets
}
return(list(panel=df,flat=d_res))
}
return(df)
}
#' Function to extract variable names from an equation.
vnames <- function(f){
f <- as.character(f)
f <- paste(f,collapse = " ")
f <- gsub("~"," ",f,fixed = TRUE)
f <- gsub("+"," ",f,fixed = TRUE)
f <- gsub("-"," ",f,fixed = TRUE)
f <- gsub("*"," ",f,fixed = TRUE)
f <- gsub("I("," ",f,fixed = TRUE)
f <- gsub("("," ",f,fixed = TRUE)
f <- gsub(")"," ",f,fixed = TRUE)
f <- gsub("^"," ",f,fixed = TRUE)
f <- gsub("/"," ",f,fixed = TRUE)
f <- stringr::str_squish(f)
f <- unlist(strsplit(f,split = " "))
f <- setdiff(f,as.character(1:9))
return(unique(f))
}
#' Function to extract object names from function call.
#' @param s Function call (string)
get_objs <- function(s){
s <- substr(ptext,1,nchar(ptext)-1)
s <- gsub(".*\\(","",s)
s <- unlist(strsplit(s,","))
s <- gsub(".*\\=","",s)
s <- s[setdiff(1:length(s),grep('\\^|\\*|\\|\\+|-|\\/|"',s))]
s <- setdiff(s,c("TRUE","FALSE"))
return(s)
}
#' Function to append the sorting group of an asset for a given variable
#' (or combination of variables) to panel data.
append_group <- function(pmat,Vars,groupings,Id="ticker"){
# Elements of Vars should be ranks, not raw variables!
if(is.null(names(Vars))){ names(Vars) <- paste0("V",1:length(Vars))}
mr <- multi_rank(Vars=Vars,groupings=groupings)
mr <- mr[[length(mr)]]
tm <- index(mr)
mr <- as.data.frame(mr,row.names = FALSE)
mr$date <- tm
mr <- tidyr::gather(mr,ticker,group,-date)
names(mr)[which(names(mr)=="ticker")] <- Id
names(mr)[which(names(mr)=="group")] <- paste0(names(Vars),"_group")
mr <- merge(pmat,mr,all.x = TRUE)
}
#' Function to get different types of mean return from portfolio return object.
mean_return <- function(rpmat,overlapping=TRUE,leading=FALSE){
rp <- period_return(rpmat, leading = leading)
if (overlapping){
rp <- na.omit(rp)
ts_mean <- rowMeans(rp)
n <- length(ts_mean)
t_m <- mean(ts_mean)
s_d <- sd(ts_mean)/sqrt(n)
} else {
rp <- na.omit(as.numeric(rp))
t_m <- mean(rp)
n <- length(rp)
s_d <- sd(rp)/sqrt(n)
}
tstat <- t_m/s_d
pval <- (1-pt(abs(tstat),df=n-1))*2
return(cbind(mean=t_m,tstat=tstat,pval=pval))
}
#' Function to remove no-invest periods in portfolio simulation results.
#' @description Trading resumes at next entry date for a given K portfolio.
#' In this ways, portfolios are still staggered after event period.
gfc_adjust <- function(rmat,weights,K_m,date1,date2,whole_months=TRUE,
overlapping_only=FALSE){
date1 <- as.Date(date1)
date2 <- as.Date(date2)
init_vals <- apply(rmat,2,function(x) as.numeric(na.omit(x)[1]))
adj_list <- list()
for (i in 1:ncol(rmat)){
x <- na.omit(rmat[,i])
x <- diff(log(x))
x[1,] <- 0
wi <- weights_i(weights,start_i=i,k=K_m)[[2]]
d2i <- index(wi)[which(index(wi)>=date2)[1]]
d1i <- GFC1
if(whole_months){d1i <- floor_date(d1i,"months")}
if(!overlapping_only){
x[index(x) >= d1i & index(x) <= d2i] <- 0
x <- exp(cumsum(x))*init_vals[i]
}
x[index(x) >= d1i & index(x) <= d2i] <- NA
adj_list[[i]] <- x
}
adj_list <- do.call(cbind,adj_list)
names(adj_list) <- names(rmat)
if(overlapping_only){
# Want to keep original index, but everywhere without full overlap make NA
tm0 <- index(adj_list)
tm <- index(na.trim(adj_list))
adj_list <- na.omit(adj_list)
adj_list <- cbind(adj_list,tm)
nas <- which(is.na(adj_list[,1]))
adj_list <- exp(cumsum(na.fill(adj_list,0)))
adj_list[nas,] <- NA
adj_list <- cbind(adj_list,tm0)
}
return(adj_list)
}
#' Function to remove weights on assets with discordent information.
#' Specifcally, only long (short) holdings with positive (negative) variables
#' (e.g. sentiment, momentum) are kept.
remove_discordant <- function(weights,Raw){
# Raw is list of raw variable values (not ranks)
long <- short <- xts(matrix(0,nrow=nrow(weights),ncol=ncol(weights)),
order.by=index(weights))
long <- (weights>0)+long
short <- (weights<0)+short
for (i in 1:length(Raw)){
if(!is.null(Raw[[i]])){
long <- (Raw[[i]] > 0)*long
short <- (Raw[[i]] < 0)*short
}
}
long <- long/rowSums(long,na.rm = TRUE)
short <- short/rowSums(short,na.rm = TRUE)
long <- na.fill(long,0)
short <- na.fill(short,0)
wts <- long-short
names(wts) <- names(weights)
return(wts)
}
#' Function to perform simultaneous multiple sorting. Differs from multi_rank
#' in that sorting is not conducted consecutively. Rather, the ranks of an
#' asset with respect to each variable are multiplied together.
#' Removes assets with discordant information if Raw !=null.
multi_rank_no_order_deprecated <- function(Vars,Raw,long_only=FALSE, TopN){
# Note: Zero used to identify non-investable assets
r <- Vars[[1]]
for (i in 2:length(Vars)){
r <- r*Vars[[i]]
}
long=r
short=r
for (i in 1:length(Raw)){
if(!is.null(Raw[[i]])){
long <- (Raw[[i]] > 0)*long
short <- (Raw[[i]] < 0)*short
}
}
long <- na.fill(long,0)
short <- na.fill(short,0)
for (i in 1:nrow(r)){
ranks_l <- rank(as.numeric(long[i,long[i,]!=0]),ties.method="first")
long[i,long[i,]!=0] <- ranks_l
ranks_s <- rank(-as.numeric(short[i,short[i,]!=0]),ties.method="first")
short[i,short[i,]!=0] <- ranks_s
}
long[index(long),] <- as.matrix((long <= TopN)&(long>0))
long <- na.fill(long/rowSums(long),0)
short[index(short),] <- as.matrix((short <= TopN)&(short>0))
short <- na.fill(short/rowSums(short),0)
if(long_only){
names(long) <- names(Vars[[1]])
return(long)}
wts <- (long-short)
names(wts) <- names(Vars[[1]])
return(wts)
}
#' Function which combines individual ranks simultaneosly, as opposed to
#' sequentially as in multi_rank.
# Like multi_rank, it is assumed ranks already account for missing data or
#' non-membership with zeros.
multi_rank_no_order <- function(rank_list, type = c("dist","sum","prod")){
nms <- names(rank_list[[1]])
# Merge on dates
ind <- sort(unique(do.call("c",lapply(rank_list,
function(x) as.Date(zoo::index(x))))))
rank_list <- lapply(rank_list, function(x) cbind(x,ind))
# Rank
type = match.arg(type)
if (type=="sum"){
r <- Reduce("+",rank_list)
} else if (type=="prod"){
r <- Reduce("*",rank_list)
} else {
r <- sqrt(Reduce("+",lapply(rank_list,function(x) x^2)))
}
r <- r*Reduce("*",lapply(rank_list,function(x) x != 0))
r <- na.fill(r,0)
for (i in 1:nrow(r)){
if(sum(r[i,]!=0)==0){next()}
r[i,r[i,]!=0] <- rank(as.numeric(r[i,r[i,]!=0]),ties.method="first")
}
names(r) <- nms
return(r)
}
#' Function to parallelize return-based simulation over each leg.
return_sim <- function(weights,K_m=6,DF_rets,
stop_loss=0.9,holding_time=FALSE, initEq=1,
add_initEq=FALSE){
cores <- detectCores()
mycluster <- makeCluster(cores-1,type = "FORK")
registerDoParallel(mycluster)
out <- foreach(start_i = 1:K_m) %dopar% {
wts_i <- weights_i(weights, start_i, K_m, holding_time)[[1]]
rp <- rp_cum_rets(wts=wts_i,DF_rets,stop_loss)
initEq*rp
}
stopCluster(mycluster)
names(out) <- paste0("portfolio",1:K_m)
rmat <- do.call(cbind, out) ;rm(out)
# Make first value in each column initial equity
if(add_initEq){
for (i in 1:ncol(rmat)){
last_na <- which(!is.na(rmat[,i]))[1]-1
rmat[last_na,i] <- initEq
}
}
rp <- xts(rowSums(na.trim(rmat)),order.by = index(na.trim(rmat)))
return(list(rmat=rmat,rp=rp))
}
#' Function that takes a primary ranking and compromises the primary
#' rank to ensure concordant variable direction of specified variables.
remove_discordant_bruce <- function(primary_ranks,Raw,TopN){
# Function to use rankings of first variable,
# but apply variables in Raw as a filter based on sign.
# i.e. long (short) side only contains assets with positive (negative)
# Raw values
long <- short <- primary_ranks
for (i in 1:length(Raw)){
if(!is.null(Raw[[i]])){
long <- (Raw[[i]] > 0)*long
short <- (Raw[[i]] < 0)*short
}
}
long <- na.fill(long,0)
short <- na.fill(short,0)
for (i in 1:nrow(long)){
ranks_l <- rank(as.numeric(long[i,long[i,]!=0]),ties.method="first")
long[i,long[i,]!=0] <- ranks_l
ranks_s <- rank(-as.numeric(short[i,short[i,]!=0]),ties.method="first")
short[i,short[i,]!=0] <- ranks_s
}
short <- ((short <= TopN) & (short > 0))
long <- ((long <= TopN) & (long > 0))
short <- na.fill(short/rowSums(short,na.rm = TRUE),0)
long <- na.fill(long/rowSums(long,na.rm = TRUE),0)
wts <- long-short
names(wts) <- names(primary_ranks)
return(wts)
}
#' Function to run factor regressions and neatly present results.
#' Allows for panel-style or mean overlapping return analysis.
alpha_table <- function(panel_list,factor_mat,
model=c("TS","TSX","CAPM","FF3","FF5"),
method=c("Panel","Fama"),vcov_panel=plm::vcovSCC,
vcov_fama=sandwich::NeweyWest,to_monthly=TRUE,
leading=FALSE,stat=c("tval","pval","both")){
method = match.arg(method)
model = match.arg(model)
stat = match.arg(stat)
fmla <- switch(model, CAPM=I(rp-RF)~Mkt_RF, FF3=I(rp-RF)~Mkt_RF+HML+SMB,
FF5=I(rp-RF)~Mkt_RF+HML+SMB+RMW+CMA,TS=rp~1,TSX=I(rp-RF)~1)
if(method=="Panel"){
# Method 1 - SCC Panel
alpha1 <- numeric(length(panel_list))
p1 <- numeric(length(panel_list))
t1 <- numeric(length(panel_list))
for (i in 1:length(panel_list)){
rpmat <- panel_list[[i]]
rp_panel <- panelise_rp(rpmat,ind_mat = factor_mat,
to_monthly=to_monthly,leading=leading)
reg_model <- plm(fmla,data=rp_panel,index=c("portfolio","date"),
model="pooling")
coefs <- as.matrix(coeftest(reg_model, vcov. = vcov_panel))
alpha1[i] <- sprintf("%.4f",coefs[1,1])
p1[i] <- round(coefs[1,4],4)
t1[i] <- round(coefs[1,3],4)
}
} else if(method=="Fama"){
alpha1 <- numeric(length(panel_list))
p1 <- numeric(length(panel_list))
t1 <- numeric(length(panel_list))
for (i in 1:length(panel_list)){
rpmat <- panel_list[[i]]
if(to_monthly){
out <- period_return(rpmat,leading = leading)
index(out) <- as.yearmon(index(out))
rpmat <- na.omit(out)
} else{
rpmat <- PerformanceAnalytics::Return.calculate(rpmat,"discrete")
rpmat <- na.omit(rpmat)
}
tm <- index(rpmat)
rpmat <- data.frame(cbind(date=1,rp=rowMeans(rpmat,na.rm = FALSE)))
rpmat$date <- tm
rpmat <- merge(rpmat,factor_mat)
reg_model <- lm(fmla,data=rpmat)
coefs <- as.matrix(coeftest(reg_model, vcov.=vcov_fama))
alpha1[i] <- sprintf("%.4f",coefs[1,1])
p1[i] <- round(coefs[1,4],4)
t1[i] <- round(coefs[1,3],4)
}
}
alpha1[p1<0.05] <- paste0(alpha1[p1<0.05],"*")
alpha1[p1<0.01] <- paste0(alpha1[p1<0.01],"*")
alpha1[p1<0.001] <- paste0(alpha1[p1<0.001],"*")
p1 <- sprintf("%.4f",p1)
t1 <- sprintf("%.4f",t1)
stat1 <- switch(stat,pval=paste0("(",p1,")"),tval=paste0("(",t1,")"),
both=paste0(p1," (",t1,")"))
statname <- switch(stat,pval="(p-val)",tval="(t-stat)",
both="p-val (t-stat)")
capm_table1 <- data.frame(rbind(alpha1,stat1),stringsAsFactors = FALSE)
rownames(capm_table1) <- c("Intercept", statname)
names(capm_table1) <- names(panel_list)
return(capm_table1)
}
#' Function for holding period event response from overlapping return matrix.
#' @description Limited function for event plot from overlapping return matrix.
#' Default shows average holding period cumulative return by month. I.e.
#' average of all holding period equity (normalised) stacked on top of
#' one another.
#' If returns=TRUE, function averages holding period return each month - the
#' result then shows the average holding period return if all portfolio
#' holding periods were being held at the same time and equal-weighted
#' rebalanced each month (although the stocks within each holding period) are
#' buy-hold, or whatever the strategy implemented.
#' Made to be used directly from strategy output.
event_from_rmat <- function(rmat,K_m,verbose=FALSE,returns=FALSE,
index_at="endof",leading=FALSE){
res_list <- vector('list',ncol(rmat))
if(returns){
for (i in 1:ncol(rmat)){
x <- na.omit(rmat[,i])
#x <- to.monthly(x,indexAt = "endof")
#x <- x$x.Close
x <- na.omit(quantmod::monthlyReturn(x,leading = leading))
res_mat <- data.frame(matrix(nrow=ceiling(nrow(x)/K_m),ncol=(K_m+2)))
names(res_mat) <- c("date",paste0("m",0:K_m))
res_mat$m0 <- 0
for (j in 1:ceiling(nrow(x)/K_m)){
rpsub <- x[(K_m*j-K_m+1):min((K_m*j),nrow(x))]
res_mat$date[j] <- as.Date(index(rpsub)[1])
res_mat[j,paste0("m",1:nrow(rpsub))] <- as.numeric(as.numeric(rpsub))
}
res_list[[i]] <- res_mat
}
res_list <- do.call(rbind,res_list)
if(verbose){
return(res_list)
}
return(colMeans(res_list[,-1],na.rm = TRUE))
} else{
for (i in 1:ncol(rmat)){
x <- rmat[,i]
x <- na.omit(x)
x <- xts::to.monthly(x,indexAt = index_at)
x <- x$x.Close
res_mat <- data.frame(matrix(nrow=ceiling(nrow(x)/K_m),ncol =(K_m+2)))
names(res_mat) <- c("date",paste0("m",0:K_m))
for (j in 1:ceiling(nrow(x)/K_m)){
rpsub <- x[(K_m*j+1-K_m):min(((K_m*j+1)),nrow(x))]
rpsub <- rpsub/as.numeric(rpsub[1,1])
res_mat$date[j] <- as.Date(index(rpsub)[1])
res_mat[j,paste0("m",0:(nrow(rpsub)-1))] <- as.numeric(as.numeric(rpsub))
}
res_list[[i]] <- res_mat
}
res_list <- do.call(rbind,res_list)
if(verbose){
return(res_list)
}
return(colMeans(res_list[,-1],na.rm = TRUE)-1)
}
}
#' Thompson (2011) Estimator for two-way clustering plus spatial correlation.
vcovThompson <- function(x, maxlag = NULL, ...) {
w1 <- function(j, maxlag) 1
VsccL.1 <- vcovSCC(x, maxlag = maxlag, wj = w1, ...)
Vcx <- vcovHC(x, cluster = "group", method = "arellano", ...)
VnwL.1 <- vcovSCC(x, maxlag = maxlag, inner = "white", wj = w1, ...)
return(VsccL.1 + Vcx - VnwL.1)
}
#' High-level sandwhich::kweights wrapper to pass to plm.
kern_weights <- function(j, maxlag=NULL,
kern=c("Bartlett","Truncated","Parzen",
"Tukey-Hanning","Quadratic Spectral")){
kern=match.arg(kern)
if(kern=="Bartlett"){
return(max((1-j/(maxlag+1)),0))
}
b = j/maxlag
return(sandwich::kweights(b,kern))
}
#' High-level plm::vcovSCC wrapper for Driscoll and Kraay's SCC estimator.
#' Allows easier specification of kernel smoother.
vcovDK <- function(x,maxlag=NULL,kern){
maxlag <- ifelse(is.null(maxlag),ceiling((max(pdim(x)$Tint$Ti))^(1/4)),maxlag)
plm::vcovSCC(x, maxlag=maxlag,
wj=function(j,maxlag){kern_weights(j,maxlag,kern)})
}
#' Function to lead xts objects.
#' Example for calculating forward return:
#' fwd_6 <- lead_xts(x,6)/x -1
lead_xts <- function(df,n){
df <- as.data.frame(df)
df <- xts(apply(df,2,dplyr::lead,n=n),as.POSIXct(rownames(df)))
}
# Function for calculating forward or backward return over n periods for specified
# periodicity. Handles rather than removes NAs, unlike quantmod::periodReturn.
# Uses last available observation each period.
n_period_return <- function(df,n=1,on="months",forward=FALSE,ind=NULL,
leading=FALSE,type=c("discrete","continuous")){
if(is.null(ind)){
ind <- index(df)
}
eps <- endpoints(df,on=on)[-1]
sps <- c(1, eps[-length(eps)]+1)
out <- vector('list',ncol(df))
for(i in 1:ncol(df)){
x <- df[,i]
k <- numeric(length(eps))
for(j in 1:length(eps)){
p <- c(NA,na.omit(x[sps[j]:eps[j]]))
k[j] <- p[length(p)]
}
out[[i]] <- k
}
out <- xts(Reduce(cbind,out),ind[endpoints(ind,on=on)[-1]])
if(leading){
# Get first non-na value to calulate first return period
out <- cbind(out,ind[1])
for (i in 1:ncol(out)){
nai <- which(!is.na(out[,i]))[1]
out[nai-1,i] <- as.numeric(na.omit(df[,i])[1])
}
}
names(out) <- names(df)
type <- match.arg(type)
out <- switch(type, discrete = out/lag(out,n)-1,
continuous = diff(log(out),differences=n))
if(forward){
out <- lead_xts(out, n)
}
# if(leading){
# out <- out[-1,]
# }
return(out)
}
#' Function for applying alpha function to subperiods and tabulating.
#' @param rps List of prices (portfolios) to analyse.
#' @param periods List of periods in YYYYmmdd string format,
#' e.g. "20030128::20040101". Use '-' to exclude periods, e.g.
#' "-20030128::20040101".
#' @param func Function to get mean return or alpha, e.g. alpha_table(...)
#' @param str_fun Function to apply to each table. Useful for keeping or removing
#' t-stats or p-vals.
#' @details Example:
#' p1 <- "20030101::20080914"
#' p2 <- "-20080915::20090930"
#' p3 <- "20091001::20171231"
#' periods <- list(p1,p2,p3)
#' rps <- mom_deciles$portfolios[c(1,10,11)]
#' names(rps) <- c("High Mom","Low Mom","HML Mom")
#' func <- function(x) {t(alpha_table(x,model = "TS",method="Panel", factor_mat=ff3,
#' vcov_panel=vcov_panel,leading=TRUE))}
#' str_func1 <- function(x) {gsub(" .*", "", x)} # remove text after space
#' str_func2 <- function(x) {gsub(".* ", "", x)} # remove text before space
#' spts <- sub_period_alphas(lapply(rps,na.omit),periods,func= function(x)
#' {t(mean_return_table(x,leading = TRUE))},str_func = str_func1)
#' spts <- cbind(rep(c("Pre-GFC","Ex-GFC","Post-GFC"),each=2),spts)
sub_period_alphas <- function(rps,periods,func,str_func=NULL){
if(is.null(names(rps))){
nms <- paste0("P",1:length(rps))
} else {
nms <- names(rps)
}
rp_list <- vector('list',length(rps))
# Create dfs for each sub-period
for (j in 1:length(rps)){
rmat <- rps[[j]]
sub_list <- vector('list',length(periods))
for(i in 1:length(periods)){
if(substr(periods[i],1,1)=="-"){
pd <- lapply(strsplit(gsub("-","",periods[i]),"::"),lubridate::ymd)
rp_na <- rmat
rp_na[index(rp_na)>=pd[[1]][1] & index(rp_na)<=pd[[1]][2],] <- NA
sub_list[[i]] <- rp_na
} else {
sub_list[[i]] <- rmat[periods[[i]]]
}
}
rp_list[[j]] <- sub_list
}
# Perform regressions/get alpha
mrt <- lapply(rp_list,func)
mrt <- do.call(cbind,mrt)
if(!is.null(str_func)){
mrt <- apply(mrt,2,str_func)
}
resmat <- matrix("",nrow=length(periods)*2,ncol=length(rps))
resmat[which(1:nrow(resmat)%%2==1),] <- mrt[,which(1:ncol(mrt)%%2==1)]
resmat[which(1:nrow(resmat)%%2==0),] <- mrt[,which(1:ncol(mrt)%%2==0)]
colnames(resmat) <- nms
#rn <- lapply(periods,
# function(x) paste0(ymd(unlist(strsplit(x,"::"))),collapse="/\n"))
#resmat <-cbind(rep(unlist(rn),each=2),resmat)
return(resmat)
}
#' Wrapper for quantmod::periodReturn that handles NAs appropriately.
#' quantmod::periodReturn will use last non-NA price, regardless of how long
#' the price occurred. Thus, following long periods of missing data, the
#' magnitude of period return will be extremely large.
#' Note: This function will behave the same as quantmod::periodReturn if
#' index is missing. I.e. missing periods should be populated with NAs.
#' Warning!: Should only be used if there is a single continuous period of
#' NAs!
period_return <- function(x,period="monthly",type="arithmetic",leading=FALSE){
r_list <- vector('list',ncol(x))
for (i in 1:ncol(x)){
y <- zoo::na.trim(x[,i])
if(sum(is.na(y))>0){
na_first <- min(which(is.na(y)))-1
na_last <- max(which(is.na(y)))+1
y1 <- quantmod::periodReturn(y[1:na_first,],
period=period,type=type,leading=leading)
y2 <- quantmod::periodReturn(y[na_last:nrow(y),],
period=period,type=type,leading=leading)
r_list[[i]] <- rbind(y1,y2)
} else{
r_list[[i]] <- quantmod::periodReturn(y,period=period,type=type,
leading=leading)
}
}
return(do.call(cbind,r_list))
}
#' Helper function to add significance marks to coefficients, given a
#' vector of coefficients, a vector p values, and vector significance cut-offs.
signif_stars <- function(a,p,sigs = c(0.05,0.01,0.001)){
for (i in 1:length(sigs)){
a[p < sigs[i]] <- paste0(a[p <sigs[i]],"*")
}
return(a)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.