#' @title Fit a fundamental factor model using cross-sectional regression
#'
#' @description Fit a fundamental (cross-sectional) factor model using ordinary
#' least squares or robust regression. Fundamental factor models use observable
#' asset specific characteristics (or) fundamentals, like industry
#' classification, market capitalization, style classification (value, growth)
#' etc. to calculate the common risk factors. An object of class \code{"ffm"}
#' is returned.
#'
#' @details
#' Estimation method "LS" corresponds to ordinary least squares using
#' \code{\link[stats]{lm}} and "Rob" is robust regression using
#' \code{\link[robust]{lmRob}}. "WLS" is weighted least squares using estimates
#' of the residual variances from LS regression as weights (feasible GLS).
#' Similarly, "W-Rob" is weighted robust regression.
#'
#' Standardizing style factor exposures: The exposures can be standardized into
#' z-scores using regular or robust (see \code{rob.stats}) measures of location
#' and scale. Further, \code{weight.var}, a variable such as market-cap, can be
#' used to compute the weighted mean exposure, and an equal-weighted standard
#' deviation of the exposures about the weighted mean. This may help avoid an
#' ill-conditioned covariance matrix. Default option equally weights exposures
#' of different assets each period.
#'
#' If \code{rob.stats=TRUE}, \code{\link[robust]{covRob}} is used to compute a
#' robust estimate of the factor covariance/correlation matrix, and,
#' \code{\link[robustbase]{scaleTau2}} is used to compute robust tau-estimates
#' of univariate scale for residuals during "WLS" or "W-Rob" regressions. When
#' standardizing style exposures, the \code{\link[stats]{median}} and
#' \code{\link[stats]{mad}} are used for location and scale respectively.
#'
#'
#' The original function was designed by Doug Martin and initially implemented
#' in S-PLUS by a number of University of Washington Ph.D. students:
#' Christopher Green, Eric Aldrich, and Yindeng Jiang. Guy Yollin ported the
#' function to R and Yi-An Chen modified that code. Sangeetha Srinivasan
#' re-factored, tested, corrected and expanded the functionalities and S3
#' methods. Chindhanai Uthaisaad expanded the functionalities for the fundamental
#' law of active management
#'
#'
#' @importFrom stats lm as.formula coef contr.treatment fitted mad median model.matrix
#' na.exclude na.fail na.omit var cov
#' @importFrom robustbase scaleTau2 covOGK
#' @importFrom PerformanceAnalytics checkData kurtosis skewness
#' @importFrom robust covRob covClassic lmRob
#'
#' @param data data.frame of the balanced panel data containing the variables
#' \code{asset.var}, \code{ret.var}, \code{exposure.vars}, \code{date.var} and
#' optionally, \code{weight.var}.
#' @param asset.var character; name of the variable for asset names.
#' @param ret.var character; name of the variable for asset returns.
#' @param date.var character; name of the variable containing the dates
#' coercible to class \code{Date}.
#' @param exposure.vars vector; names of the variables containing the
#' fundamental factor exposures.
#' @param weight.var character; name of the variable containing the weights
#' used when standarizing style factor exposures. Default is \code{NULL}. See
#' Details.
#' @param fit.method method for estimating factor returns; one of "LS", "WLS"
#' "Rob" or "W-Rob". See details. Default is "LS".
#' @param rob.stats logical; If \code{TRUE}, robust estimates of covariance,
#' correlation, location and univariate scale are computed as appropriate (see
#' Details). Default is \code{FALSE}.
#' @param full.resid.cov logical; If \code{TRUE}, a full residual covariance
#' matrix is estimated. Otherwise, a diagonal residual covariance matrix is
#' estimated. Default is \code{FALSE}.
#' @param z.score method for exposure standardization; one of "none", "crossSection", or "timeSeries".
#' Default is \code{"none"}.
#' @param addIntercept logical; If \code{TRUE}, intercept is added in the exposure matrix. Default is \code{FALSE}.
#' @param lagExposures logical; If \code{TRUE}, the style exposures in the exposure matrix are lagged by one time period. Default is \code{FALSE}.
#' @param resid.EWMA logical; If \code{TRUE}, the residual variances are computed using EWMA and these would be used as weights for "WLS" or "W-Rob". Deafault is \code{FALSE}.
#' @param lambda lambda value to be used for the EWMA estimation of residual variances. Default is 0.9
#' @param analysis method used in the analysis of fundamental law of active management; one of "none", "ISM",
#' or "NEW". Default is "none".
#' @param stdReturn logical; If \code{TRUE}, the returns will be standardized using GARCH(1,1) volatilities. Default is \code{FALSE}
#' @param fullPeriod logical; If \code{TRUE}, the fundamental law of active management will apply to all but the last
#' time period. Default is \code{FALSE}
#' @param windowLength integer; the number of months used as a window length in the FLAM analysis. Default is 60 (5 years).
#' @param targetedVol numeric; the targeted portfolio volatility in the analysis. Default is 0.06.
#' @param ... potentially further arguments passed.
#'
#'
#' @return \code{fitFfm} returns an object of class \code{"ffm"} for which
#' \code{print}, \code{plot}, \code{predict} and \code{summary} methods exist.
#'
#'
#' The generic accessor functions \code{coef}, \code{fitted} and
#' \code{residuals} extract various useful features of the fit object.
#' Additionally, \code{fmCov} computes the covariance matrix for asset returns
#' based on the fitted factor model.
#'
#'
#' An object of class \code{"ffm"} is a list containing the following
#' components:
#' \item{factor.fit}{list of fitted objects that estimate factor returns in each
#' time period. Each fitted object is of class \code{lm} if
#' \code{fit.method="LS" or "WLS"}, or, class \code{lmRob} if
#' \code{fit.method="Rob" or "W-Rob"}.}
#' \item{beta}{N x K matrix of factor exposures for the last time period.}
#' \item{factor.returns}{xts object of K-factor returns (including intercept).}
#' \item{residuals}{xts object of residuals for N-assets.}
#' \item{r2}{length-T vector of R-squared values.}
#' \item{factor.cov}{K x K covariance matrix of the factor returns.}
#' \item{g.cov}{ covariance matrix of the g coefficients for a Sector plus market and Sector plus Country plus global market models.}
#' \item{resid.cov}{N x N covariance matrix of residuals.}
#' \item{return.cov}{N x N return covariance estimated by the factor model,
#' using the factor exposures from the last time period.}
#' \item{restriction.mat}{The restriction matrix used in the computation of f=Rg.}
#' \item{resid.var}{length-N vector of residual variances.}
#' \item{call}{the matched function call.}
#' \item{data}{data frame object as input.}
#' \item{date.var}{date.var as input}
#' \item{ret.var}{ret.var as input}
#' \item{asset.var}{asset.var as input.}
#' \item{exposure.vars}{exposure.vars as input.}
#' \item{weight.var}{weight.var as input.}
#' \item{fit.method}{fit.method as input.}
#' \item{asset.names}{length-N vector of asset names.}
#' \item{factor.names}{length-K vector of factor.names.}
#' \item{time.periods}{length-T vector of dates.}
#' \item{condAlpha}{length-windowLength the conditional mean of the portfolio returns in each moving window.}
#' \item{condOmega}{length-windowLength list of the conditional covariance matrices of the portfolio returns in each moving window.}
#' \item{IR}{the vector of in-sample IR, out-f-sample IR, and the standard error of the out-of-sample IR.}
#' Where N is the number of assets, K is the number of factors (including the
#' intercept or dummy variables) and T is the number of unique time periods.
#'
#' @author Sangeetha Srinivasan, Guy Yollin, Yi-An Chen, Avinash Acharya and Chindhanai Uthaisaad
#'
#' @references
#' Menchero, J. (2010). The Characteristics of Factor Portfolios. Journal of
#' Performance Measurement, 15(1), 52-62.
#'
#' Grinold, R. C., & Kahn, R. N. (2000). Active portfolio management (Second
#' Ed.). New York: McGraw-Hill.
#'
#'
#' @examples
#'
#'
#' # Load fundamental and return data
#' data("factorDataSetDjia5Yrs")
#'
#' # fit a fundamental factor model
#' exposure.vars <- c("P2B", "MKTCAP")
#' fit <- fitFfm(data=factorDataSetDjia5Yrs, asset.var="TICKER", ret.var="RETURN",
#' date.var="DATE", exposure.vars=exposure.vars)
#' names(fit)
#'
#' # fit a Industry Factor Model with Intercept
#' exposure.vars <- c("SECTOR","P2B")
#' fit1 <- fitFfm(data=factorDataSetDjia5Yrs, asset.var="TICKER", ret.var="RETURN",
#' date.var="DATE", exposure.vars=exposure.vars, addIntercept=TRUE)
#'
#' # Fit a SECTOR+COUNTRY+Style model with Intercept
#' # Create a COUNTRY column with just 3 countries
#'
#' factorDataSetDjia5Yrs$COUNTRY = rep(rep(c(rep("US", 1 ),rep("GERMANY", 1 )), 11), 60)
#' exposure.vars= c("SECTOR", "COUNTRY","P2B", "MKTCAP")
#'
#' fit.MICM <- fitFfm(data=factorDataSetDjia5Yrs, asset.var="TICKER", ret.var="RETURN",
#' date.var="DATE", exposure.vars=exposure.vars, addIntercept=TRUE)
#'
#' data("mktSP")
#' data("factorDataSetDjia")
#' factorDataSetDjia <- factorDataSetDjia[order(factorDataSetDjia[, "DATE"]), ]
#' # Extract asset names from data
#' asset.names <- unique(factorDataSetDjia[["TICKER"]])
#' N_stocks <- length(asset.names)
#' time.periods <- unique(factorDataSetDjia[["DATE"]])
#' N_TP <- length(time.periods)
#' bmkReturn <- mktSP[index(mktSP) %in% time.periods, ]
#'
#' totReturns = matrix(factorDataSetDjia[["RETURN"]], nrow = N_stocks)[1:N_stocks, ]
#' rownames(totReturns) = asset.names
#'
#' # Compute residual returns from CAPM----
#' residReturns <- totReturns
#' beta_i <- c()
#' for (i in 1:N_stocks) {
#' beta_i[i] <- c(cov(totReturns[i, ] , bmkReturn) / var(bmkReturn))
#' residReturns[i, ] <- totReturns[i, ] - beta_i[i] * bmkReturn
#' }
#'
#' modData <- cbind(factorDataSetDjia, "RESIDRETURN" = as.vector(residReturns))
#' sizeFfm <- fitFfm(modData, asset.var = "TICKER", ret.var = "RESIDRETURN",
#' exposure.vars = c("SIZE"), addIntercept = TRUE, stdReturn = FALSE,
#' z.score = "crossSection", date.var = "DATE", lagExposures = TRUE,
#' analysis = "ISM")
#'
#' p2bFfm <- fitFfm(modData, asset.var = "TICKER", ret.var = "RESIDRETURN",
#' exposure.vars = c("P2B"), addIntercept = TRUE, stdReturn = TRUE,
#' z.score = "timeSeries", date.var = "DATE", lagExposures = TRUE,
#' analysis = "NEW")
#'
#'
#' @export
fitFfm <- function(data, asset.var, ret.var, date.var, exposure.vars,
weight.var = NULL, fit.method=c("LS","WLS","Rob","W-Rob"),
rob.stats = FALSE, full.resid.cov=FALSE, z.score = c("none", "crossSection", "timeSeries"),
addIntercept = FALSE, lagExposures = FALSE, resid.EWMA = FALSE,
lambda = 0.9, stdReturn = FALSE, fullPeriod = FALSE, windowLength = 60,
analysis = c("none", "ISM", "NEW"), targetedVol = 0.06, ...) {
# record the call as an element to be returned
this.call <- match.call()
# set defaults and check input validity
if (missing(data) || !is.data.frame(data)) {
stop("Invalid args: data must be a data.frame")
}
fit.method = fit.method[1]
if (!(fit.method %in% c("LS","WLS","Rob","W-Rob"))) {
stop("Invalid args: fit.method must be 'LS', 'WLS', 'Rob' or 'W-Rob'")
}
if (missing(asset.var) || !is.character(asset.var)) {
stop("Invalid args: asset.var must be a character string")
}
if (missing(date.var) || !is.character(date.var)) {
stop("Invalid args: date.var must be a character string")
}
if (missing(ret.var) || !is.character(ret.var)) {
stop("Invalid args: ret.var must be a character string")
}
if (missing(exposure.vars) || !is.character(exposure.vars)) {
stop("Invalid args: exposure.vars must be a character vector")
}
if (ret.var %in% exposure.vars) {
stop("Invalid args: ret.var can not also be an exposure")
}
if (!is.null(weight.var) && !is.character(weight.var)) {
stop("Invalid args: weight.var must be a character string")
}
if (!is.logical(rob.stats) || length(rob.stats) != 1) {
stop("Invalid args: control parameter 'rob.stats' must be logical")
}
if (!is.logical(full.resid.cov) || length(full.resid.cov) != 1) {
stop("Invalid args: control parameter 'full.resid.cov' must be logical")
}
z.score = z.score[1]
if (!(z.score %in% c("none", "crossSection", "timeSeries")) || length(z.score) != 1) {
stop("Invalid args: control parameter 'z.score' must be either csScore or tsScore")
}
analysis = analysis[1]
if (!(analysis %in% c("none", "ISM", "NEW")) || length(z.score) != 1) {
stop("Invalid args: control parameter 'analysis' must be either ISM or NEW")
}
# initialize to avoid R CMD check's NOTE: no visible binding for global var
DATE=NULL
W=NULL
model.MSCI=FALSE
model.styleOnly = FALSE
restriction.mat = NULL
g.cov = NULL
# ensure dates are in required format
data[[date.var]] <- as.Date(data[[date.var]])
# extract unique time periods from data
time.periods <- unique(data[[date.var]])
TP <- length(time.periods)
if (TP < 2) {
stop("Invalid args: at least 2 unique time periods are required to fit the
factor model")
}
# order data.frame by date.var
data <- data[order(data[,date.var]),]
# extract asset names from data
asset.names <- unique(data[[asset.var]])
N <- length(asset.names)
rawReturns <- matrix(data[[ret.var]], nrow = N)
# Standardize the returns if stdReturn = TRUE
if (stdReturn) {
sdReturns <- apply(rawReturns, 2, sd)
sigmaGarch <- rawReturns
for (i in 1:N) {
ts <- rawReturns[i, ] ^ 2
var_past_2 <- 0
sigmaGarch[i, ] <- sapply(ts, function(x) var_past_2 <<- (1 - 0.10 - 0.81) * sdReturns[i] ^ 2 + 0.10 * x + 0.81 * var_past_2)
}
sigmaGarch <- sqrt(sigmaGarch)
data[[ret.var]] <- as.vector(rawReturns / sigmaGarch)
}
# check number & type of exposure; convert character exposures to dummy vars
which.numeric <- sapply(data[, exposure.vars, drop=FALSE], is.numeric)
exposures.num <- exposure.vars[which.numeric]
exposures.char <- exposure.vars[!which.numeric]
if ((length(exposures.char) >1) && !addIntercept) {
stop("Invalid args: Sector + Country model without Market(Interecept) is currenlty not handled")
}
if (length(exposures.char) > 1)
{ #Model has both Sector and Country along wit Intercept
model.MSCI = TRUE
}
if (length(exposures.char) == 0)
{
model.styleOnly = TRUE
}
# Convert numeric exposures to z-scores
if (!grepl(z.score, "none")) {
if (!is.null(weight.var)) {
# Weight exposures within each period using weight.var
w <- unlist(by(data=data, INDICES=data[[date.var]],
function(x) x[[weight.var]]/sum(x[[weight.var]])))
} else {
w <- rep(1, nrow(data))
}
# Calculate z-scores looping through all numeric exposures
if (grepl(z.score, "csScore")) {
for (i in exposures.num) {
std.expo.num <- by(data = data, INDICES = data[[date.var]], FUN = zScore,
i = i, w = w, rob.stats = rob.stats, z.score = z.score,
asset.names = asset.names)
data[[i]] <- unlist(std.expo.num)
}
} else {
for (i in exposures.num) {
data[[i]] <- zScore(x = data, i = i, w = w, rob.stats = rob.stats,
z.score = z.score, asset.names = asset.names)
}
}
}
if(lagExposures)
{
data <- data[order(data[,date.var]),]
#Get the style exposures except for the last time period
dataExpoLagged <- data[1:((TP-1)*N), exposures.num]
#Remove data corresponding to the first time period
data.lagged <- data[-(1:N),]
#Replace style expo with lagged expo
data.lagged[,exposures.num] <- dataExpoLagged
data <- data.lagged
#Update the time period length
time.periods <- unique(data[[date.var]])
TP <- length(time.periods)
}
if(!model.MSCI)
{
# determine factor model formula to be passed to lm or lmRob
fm.formula <- paste(ret.var, "~", paste(exposure.vars, collapse="+"))
if (length(exposures.char))
{
#Remove Intercept as it introduces rank deficiency in the exposure matrix.
#Implemetation with Intercept is handled later, using a Restriction matrix to remove the rank deficiency.
fm.formula <- paste(fm.formula, "- 1")
data[, exposures.char] <- as.factor(data[,exposures.char])
contrasts.list <- lapply(seq(length(exposures.char)), function(i)
function(n) contr.treatment(n, contrasts=FALSE))
names(contrasts.list) <- exposures.char
}
else
{
if (!addIntercept && model.styleOnly) {
fm.formula <- paste(fm.formula, "- 1")}
contrasts.list <- NULL
}
# convert the pasted expression into a formula object
fm.formula <- as.formula(fm.formula)
}
if (addIntercept == TRUE && model.MSCI == FALSE && model.styleOnly ==FALSE)
{
#formula to extract beta of Sec or Country
formula.expochar = as.formula(paste(ret.var, "~", exposures.char, "-1"))
factor.names <- c("Market",
paste(levels(data[,exposures.char]),sep=" "), exposures.num)
beta.expochar <- model.matrix(formula.expochar, data=data)
rownames(beta.expochar) <- rep(asset.names, length(time.periods))
#Beta for the whole model (generally without intercept)
beta <- model.matrix(fm.formula, data=data)
rownames(beta) <- rep(asset.names, length(time.periods))
#Define beta.star as Beta of the whole model with Intercept/Market represtend by col of ones
beta.star <- cbind("Market" = rep(1, nrow(beta.expochar)), beta.expochar)
if(length(exposures.num) > 0){
beta.style<- matrix(beta[,exposures.num], ncol = length(exposures.num))
colnames(beta.style) = exposures.num
#Define Beta for Style factors
B.style =beta.style[((TP-1)*N+1) : (TP*N), ]}
#Number of factors
K <- dim(beta.star)[2]
#Define Restriction matrix
R_matrix = rbind(diag(K-1), c(0,rep(-1,K-2)))
#Define B.Mod = X*R
B.mod = (beta.star[1:N, ]) %*% R_matrix
#Formula for Markt+Sec/Country Model
fmSI.formula = as.formula(paste(ret.var, "~", "B.mod+", paste(exposures.num, collapse="+"),"-1" ))
}
if(model.MSCI == FALSE)
{
#Perform regression using fm.formula without any restriction matrix, if WLS/WRob is the
#fit.method when addIntercept =TRUE. Else use fmSI.formula.
if (!(grepl("W",fit.method)) && addIntercept==TRUE && !model.styleOnly){
fm.formula = fmSI.formula
contrasts.list = NULL}
# estimate factor returns using LS or Robust regression
# returns a list of the fitted lm or lmRob objects for each time period
if (grepl("LS",fit.method))
{
reg.list <- by(data=data, INDICES=data[[date.var]], FUN=lm,
formula=fm.formula, contrasts=contrasts.list,
na.action=na.fail)
}
else if (grepl("Rob",fit.method))
{
reg.list <- by(data=data, INDICES=data[[date.var]], FUN=lmRob,
formula=fm.formula, contrasts=contrasts.list,
mxr=200, mxf=200, mxs=200, na.action=na.fail)
}
# compute residual variance for all assets for weighted regression
if (grepl("W",fit.method)) {
if (rob.stats) {
resid.var <- apply(sapply(reg.list, residuals), 1, scaleTau2)^2
} else {
resid.var <- apply(sapply(reg.list, residuals), 1, var)
}
if(resid.EWMA)
{ res = sapply(reg.list, residuals)
w <- matrix(0,N,TP)
for(i in 1:N)
{
var_tminus1 = as.numeric(resid.var[i])
for(j in 2:TP)
{
w[i,j] = var_tminus1 + ((1-lambda)*(res[i,j]^2-var_tminus1))
var_tminus1 = w[i,j]
}
}
w[,1] = resid.var
data<- cbind(data, W = 1/as.numeric(w))
}
else
{
data <- cbind(data, W=1/resid.var)
}
}
# estimate factor returns using WLS or weighted-Robust regression
# returns a list of the fitted lm or lmRob objects for each time period
if (fit.method=="WLS") {
if(addIntercept && !model.styleOnly){
fm.formula = fmSI.formula
contrasts.list = NULL}
reg.list <- by(data=data, INDICES=data[[date.var]],
FUN=function(x) {
lm(data=x, formula=fm.formula, contrasts=contrasts.list,
na.action=na.fail, weights=W)
})
} else if (fit.method=="W-Rob") {
reg.list <- by(data=data, INDICES=data[[date.var]],
FUN=function(x) {
lmRob(data=x, formula=fm.formula, contrasts=contrasts.list,
na.action=na.fail, weights=W,
mxr=200, mxf=200, mxs=200)
})
}
}
## Compute or Extract objects to be returned
if ((addIntercept == FALSE || model.styleOnly ==TRUE) && model.MSCI == FALSE)
{
# number of factors including Market and dummy variables
if (length(exposures.char)) {
factor.names <- c(exposures.num,
paste(levels(data[,exposures.char]),sep=""))
} else {
if(addIntercept) factor.names <- c("Alpha", exposures.num)
else factor.names <- exposures.num
}
K <- length(factor.names)
# exposure matrix B or beta for the last time period - N x K
beta <- model.matrix(fm.formula, data=subset(data, DATE==time.periods[TP]))
rownames(beta) <- asset.names
#Shorten the Sector/Country names
colnames(beta) = gsub("COUNTRY|SECTOR|GICS.", "", colnames(beta))
#colnames(beta) = gsub(paste(exposures.char), "", colnames(beta))
#Remove SECTOR/COUNTRY to shorten the coef names.
if (length(exposures.char) >0 )
{
reg.list= lapply(seq(1:TP), function(x){ names(reg.list[[x]]$coefficients) = gsub("COUNTRY|SECTOR|GICS.", "",names(reg.list[[x]]$coefficients) ) ;reg.list[[x]]})
names(reg.list) = as.character(unique(data[[date.var]]))
}else if(model.styleOnly && addIntercept)
{
reg.list= lapply(seq(1:TP), function(x){ names(reg.list[[x]]$coefficients)[1] = "Alpha";reg.list[[x]]})
names(reg.list) = as.character(unique(data[[date.var]]))
}
# time series of factor returns = estimated coefficients in each period
factor.returns <- sapply(reg.list, function(x) {
temp <- coef(x)
temp[match(factor.names, names(temp))]})
# simplify factor.names for dummy variables
if (length (exposures.char)) {
factor.names <- c(exposures.num, levels(data[,exposures.char]))
}
if (length(factor.names) == 1) {
factor.returns <- as.matrix(factor.returns)
names(factor.returns) <- factor.names
factor.returns <- t(factor.returns)
} else {
rownames(factor.returns) <- factor.names
}
factor.returns <- checkData(t(factor.returns)) # T x K
# time series of residuals
residuals <- sapply(reg.list, residuals) # NxT
row.names(residuals) <- asset.names
residuals<- checkData(t(residuals)) #TxN
# r-squared values for each time period
r2 <- sapply(reg.list, function(x) summary(x)$r.squared)
# factor and residual covariances
if (rob.stats) {
if (kappa(na.exclude(coredata(factor.returns))) < 1e+10) {
factor.cov <- covRob(coredata(factor.returns), estim="pairwiseGK",
distance=FALSE, na.action=na.omit)$cov
} else {
cat("Covariance matrix of factor returns is singular.\n")
factor.cov <- covRob(coredata(factor.returns), distance=FALSE,
na.action=na.omit)$cov
}
resid.var <- apply(coredata(residuals), 2, scaleTau2, na.rm=T)^2
if (full.resid.cov) {
resid.cov <- covOGK(coredata(residuals), sigmamu=scaleTau2, n.iter=1)$cov
} else {
resid.cov <- diag(resid.var)
}
} else {
factor.cov <- covClassic(coredata(factor.returns), distance=FALSE,
na.action=na.omit)$cov
resid.var <- apply(coredata(residuals), 2, var, na.rm=T)
if (full.resid.cov) {
resid.cov <- covClassic(coredata(residuals), distance=FALSE,
na.action=na.omit)$cov
} else {
resid.cov <- diag(resid.var)
}
}
# return covariance estimated by the factor model
#(here beta corresponds to the exposure of last time period,TP)
return.cov <- beta %*% factor.cov %*% t(beta) + resid.cov
if(addIntercept) colnames(beta)[1] = "Alpha"
beta = beta[, colnames(factor.returns)]
}
#If Market+Sector/Country is required
else if (addIntercept == TRUE && model.MSCI == FALSE && model.styleOnly ==FALSE)
{
#Rename regression coefs
reg.list= lapply(seq(1:TP), function(x){ names(reg.list[[x]]$coefficients) = paste("g", seq(1:length(reg.list[[x]]$coefficients)), sep = "");reg.list[[x]]})
names(reg.list) = as.character(unique(data[[date.var]]))
#Extract g coef
g = sapply(reg.list, function(x) coef(x))
#factor returns = restriction matrix * g coefficients
factor.returns = R_matrix %*% g[1:(K-1), ]
if(length(exposures.num) > 0)
factor.returns = rbind(factor.returns, g[K:nrow(g), ])
rownames(factor.returns) = factor.names
colnames(factor.returns) = as.character(unique(data[[date.var]]))
#Extract resid
residuals = sapply(reg.list, residuals)
colnames(residuals) = as.character(unique(data[[date.var]]))
row.names(residuals) = asset.names
# Create a T x N xts object of residuals
residuals <- checkData(t(residuals))
r2<- sapply(reg.list, function(x) summary(x)$r.squared)
names(r2) = as.character(unique(data[[date.var]]))
factor.returns <- checkData(t(factor.returns)) # T x K
#Rearrange g,factor return to Mkt- Style Factor - Sec/Country order
if(length(exposures.num) > 0) {g = g[c(1,K:nrow(g),2:(K-1)),]}
factor.names<- c("Market", exposures.num,
paste(levels(data[,exposures.char]),sep=" "))
factor.returns = factor.returns[, factor.names]
#Fac Covarinace
factor.cov <-covClassic(coredata(factor.returns), distance=FALSE,
na.action=na.omit)$cov
g.cov <- cov(t(g))
#Residual Variance
resid.var <- apply(coredata(residuals), 2, var, na.rm=T)
names(resid.var) <- asset.names
resid.cov <- diag(resid.var)
#Returns covariance
if(length(exposures.num) > 0){
beta.combine = cbind(beta.star, beta.style)
beta.stms = cbind(B.mod[,1], B.style, B.mod[,-1])
}else
{
beta.combine = beta.star
beta.stms = B.mod
}
colnames(beta.combine) = gsub("COUNTRY|SECTOR|GICS.", "", colnames(beta.combine))
beta.combine = beta.combine[, factor.names]
return.cov <- beta.stms %*% g.cov %*% t(beta.stms) + resid.cov
#Exposure matrix for the last time period
beta = beta.combine[((TP-1)*N+1):(TP*N), 1:ncol(beta.combine)]
#Restriction matrix
restriction.mat = R_matrix
}
else if(model.MSCI)
{
# determine factor model formula to be passed to lm
fm.formula <- paste(ret.var, "~", paste(exposure.vars, collapse="+"))
if (length(exposures.char)) {
fm.formula <- paste(fm.formula, "- 1")
for(i in exposures.char)
{
data[, i] <- as.factor(data[,i])
if (grepl("SECTOR",i))
formula.ind = as.formula(paste(ret.var, "~", i, "-1"))
else formula.cty = as.formula(paste(ret.var, "~", i, "-1"))
}
}
# convert the pasted expression into a formula object
fm.formula <- as.formula(fm.formula)
#Extract model beta, expo.char beta and expo.num betas
beta <- model.matrix(fm.formula, data=data)
beta.ind <- model.matrix(formula.ind, data=data)
beta.cty <- model.matrix(formula.cty, data=data)
beta.mic <- cbind("Market" = rep(1, nrow(beta.ind)), beta.ind, beta.cty)
if(length(exposures.num) > 0)
beta.style<- beta[,exposures.num]
fac.names.indcty = lapply(seq(exposures.char), function(x)
paste(levels(data[,exposures.char[x]]),sep=""))
if(grepl("SECTOR", exposures.char[1])){
factor.names <- c("Market",unlist(fac.names.indcty),
exposures.num)
}else{
factor.names <- c("Market", unlist((fac.names.indcty)[2]),unlist((fac.names.indcty)[1]),
exposures.num)
}
rownames(beta.mic) <- rep(asset.names, TP)
asset.names <- unique(data[[asset.var]])
N <- length(asset.names)
#Define Retrun matrix
returns = matrix(data[[ret.var]],nrow = N)
K <- length(factor.names)
K1<- dim(beta.ind)[2]
K2<- dim(beta.cty)[2]
#Define Restriction matrix
rMic<- rbind( cbind(diag(K1), matrix(0, nrow = K1, ncol = K2-1)),
c(c(0,rep(-1, K1-1)), rep(0, K2-1)),
cbind(matrix(0, ncol = K1, nrow = K2-1), diag(K2-1)),
c(rep(0, K1), rep(-1, K2-1)))
row.names(returns) = asset.names
colnames(returns) = as.character(time.periods)
reg.list<- list()
B.mod = (beta.mic[1:N, ]) %*% rMic #Y = X*R
if(length(exposures.num) > 0){
B.style = beta.style[((TP-1)*N+1) : (TP*N), ]}
fmMSCI.formula = as.formula(paste(ret.var, "~", "B.mod+", paste(exposures.num, collapse="+"),"-1" ))
reg.list <- by(data=data, INDICES=data[[date.var]],
FUN=function(x) {lm(data=x, formula=fmMSCI.formula,
na.action=na.fail)})
reg.list= lapply(seq(1:TP), function(x){ names(reg.list[[x]]$coefficients) = paste("g", seq(1:length(reg.list[[x]]$coefficients)), sep = "");reg.list[[x]]})
names(reg.list) = as.character(unique(data[[date.var]]))
g = sapply(reg.list, function(x) coef(x))
factor.returns = rMic %*% g[1:(K1+K2-1), ]
if(length(exposures.num) > 0)
factor.returns = rbind(factor.returns, g[(K1+K2):nrow(g), ])
rownames(factor.returns) <- factor.names
colnames(factor.returns) = as.character(unique(data[[date.var]]))
if(length(exposures.num) > 0){
residuals = returns - B.mod %*% g[1:(K1+K2-1), ] - B.style %*% g[(K1+K2):nrow(g), ] #NxT
}else residuals = returns - B.mod %*% g[1:(K1+K2-1), ]
colnames(residuals) = as.character(unique(data[[date.var]]))
# Create a T x N xts object of residuals
residuals <- checkData(t(residuals))
#all.equal(x,residuals)
r2<- sapply(reg.list, function(x) summary(x)$r.squared)
#r2 <- as.numeric(sapply(X = summary(reg.list), FUN = "[","r.squared"))
names(r2) = as.character(unique(data[[date.var]]))
factor.returns <- checkData(t(factor.returns)) # T x K
#Re-order the columns in the order mkt-style-sector-country
if(length(exposures.num)>0)
factor.returns <- factor.returns[,c(1,(K1+2+K2):K, 2:(K1+1), (K1+2):(K1+K2+1))]
factor.names <- colnames(factor.returns)
#Fac Covarinace
factor.cov <- covClassic(coredata(factor.returns), distance=FALSE,
na.action=na.omit)$cov
#Residual Variance
resid.var <- apply(coredata(residuals), 2, var, na.rm=T)
names(resid.var) <- asset.names
resid.cov <- diag(resid.var)
#Returns covariance
if(length(exposures.num) > 0){
beta.combine = cbind(beta.mic, beta.style)
}else
beta.combine = beta.mic
colnames(beta.combine) = gsub("COUNTRY|SECTOR|GICS.", "", colnames(beta.combine))
beta.combine = beta.combine[, factor.names]
return.cov <- beta.combine[((TP-1)*N+1):(TP*N), 1:K] %*% factor.cov %*% t( beta.combine[((TP-1)*N+1):(TP*N), 1:K]) + resid.cov
#Exposure matrix
beta = beta.combine[((TP-1)*N+1):(TP*N), 1:K]
#colnames(beta) = gsub("COUNTRY|SECTOR", "", colnames(beta))
#factor.cov = factor.cov[factor.names, factor.names]
#Restriction matrix
restriction.mat = rMic
}
# Initialization
EX <- length(exposures.num)
if (lagExposures) {
TP <- TP + 1
}
if (fullPeriod) {
windowPeriods <- 2 # Train all but the last time period
} else {
windowPeriods <- TP - (windowLength - 1)
}
# FLAM
if (EX == 1) {
# Single factor model
stdExposures <- matrix(data[[exposures.num]], nrow = N)[1:N, ]
if (grepl(analysis, "ISM")) {
# ISM model
condAlpha <- matrix(0, ncol = windowPeriods, nrow = N)
condOmega <- vector(length = windowPeriods, "list")
for (t in 1:windowPeriods) {
sigmaIC <- sd(factor.returns[t:(t + 58), 2])
condAlpha[, t] <- apply(rawReturns[, t:(t + 59)], 1, mean)
resid.var <- apply(coredata(residuals[t:(t + 58), ]), 2, var, na.rm=T)
resid.cov <- diag(resid.var)
condOmega[[t]] <- sigmaIC ^ 2 * (stdExposures[, t + 58] %*% t(stdExposures[, t + 58])) + resid.cov
}
# Optimal active weights and in-sample IR
activeWeights <- matrix(0, ncol = windowPeriods, nrow = N)
sigma_A <- targetedVol
for (t in 1:windowPeriods) {
kappa <- (t(condAlpha[, t]) %*% solve(condOmega[[t]]) %*% rep(1, N)) / (rep(1, N) %*% solve(condOmega[[t]]) %*% rep(1, N))
activeWeights[, t] <- sigma_A * (solve(condOmega[[t]]) %*% as.matrix(condAlpha[, t] - kappa * rep(1, N))) / c(sqrt(t(as.matrix(condAlpha[, t])) %*% solve(condOmega[[t]]) %*% (condAlpha[, t] - kappa * rep(1, N))))
}
condMean <- apply(activeWeights * condAlpha, 2, sum)
portIR <- condMean / sigma_A
IR_In <- mean(portIR)
# Out-of-sample IR
activeReturns <- apply(activeWeights[, 1:(windowPeriods-1)] * rawReturns[, 61:(windowPeriods + 59)], 2, sum)
IR_Out <- sqrt(12) * mean(activeReturns) / sd(activeReturns)
SE_N <- 1 / sqrt(length(activeReturns)) * sqrt(1 + 0.25 * (kurtosis(activeReturns) + 2) * IR_Out ^ 2 - skewness(activeReturns) * IR_Out)
} else if (grepl(analysis, "NEW")) {
# NEW model
# Set the factor returns in each period
IC <- c()
for (t in 1:windowPeriods) {
IC[t] <- mean(factor.returns[t:(t + 58), 2])
}
sigmaIC <- sd(IC)
condAlpha <- matrix(0, ncol = windowPeriods, nrow = N)
condOmega <- vector(length = windowPeriods, "list")
for (t in 1:windowPeriods) {
condAlpha[, t] <- mean(IC) * (diag(sigmaGarch[, t + 59]) %*% stdExposures[, t + 58])
sigma_eps <- sqrt(1 - mean(IC) ^ 2 - sigmaIC ^ 2)
condOmega[[t]] <- diag(sigmaGarch[, t + 59]) %*% (sigmaIC ^ 2 * (stdExposures[, t + 58] %*% t(stdExposures[, t + 58])) + sigma_eps^2 * diag(nrow = N)) %*% diag(sigmaGarch[, t + 59])
}
# Optimal active weights and in-sample IR
activeWeights <- matrix(0, ncol = windowPeriods, nrow = N)
sigma_A <- targetedVol
for (t in 1:windowPeriods) {
kappa <- (t(condAlpha[, t]) %*% solve(condOmega[[t]]) %*% rep(1, N)) / (rep(1, N) %*% solve(condOmega[[t]]) %*% rep(1, N))
activeWeights[, t] <- sigma_A * (solve(condOmega[[t]]) %*% as.matrix(condAlpha[, t] - kappa * rep(1, N))) / c(sqrt(t(as.matrix(condAlpha[, t])) %*% solve(condOmega[[t]]) %*% (condAlpha[, t] - kappa * rep(1, N))))
}
condMean <- apply(activeWeights * condAlpha, 2, sum)
portIR <- condMean / sigma_A
IR_In <- mean(portIR)
# Out-of-sample IR
activeReturns <- apply(activeWeights[, 1:(windowPeriods-1)] * rawReturns[, 61:(windowPeriods + 59)], 2, sum)
IR_Out <- sqrt(12) * mean(activeReturns) / sd(activeReturns)
SE_N <- 1 / sqrt(length(activeReturns)) * sqrt(1 + 0.25 * (kurtosis(activeReturns) + 2) * IR_Out ^ 2 - skewness(activeReturns) * IR_Out)
} else {
condAlpha <- condOmega <- IR_In <- IR_Out <- SE_N <- NULL
}
} else {
# Multi-factor model (To be implemented)
# Set the standardized exposures from above
condAlpha <- condOmega <- IR_In <- IR_Out <- SE_N <- NULL
}
# Create list of return values.
result <- list(factor.fit=reg.list, beta=beta, factor.returns=factor.returns,
residuals=residuals, r2=r2, factor.cov=factor.cov, g.cov = g.cov,
resid.cov=resid.cov, return.cov=return.cov, restriction.mat=restriction.mat,
resid.var=resid.var, call=this.call,
data=data, date.var=date.var, ret.var=ret.var,
asset.var=asset.var, exposure.vars=exposure.vars,
weight.var=weight.var, fit.method=fit.method,
asset.names=asset.names, factor.names=factor.names, activeReturns = activeReturns,
time.periods=time.periods, condAlpha = condAlpha,
condOmega = condOmega, IR = c(IR_In, IR_Out, SE_N))
class(result) <- "ffm"
return(result)
}
### function to calculate z-scores for numeric exposure i using weights w
## x is a data.frame object, i is a character string and w has same length as x
# rob.stats is a logical argument to compute robust location and scale
zScore <- function(x, i, w, rob.stats, z.score, asset.names) {
if (grepl(z.score, "csScore")) {
if (rob.stats) {
x_bar <- median(w * x[[i]])
(x[[i]] - x_bar)/mad(x[[i]], center = x_bar)
} else {
x_bar <- mean(w * x[[i]])
n <- length(x[[i]])
# use equal weighted squared deviation about the weighted mean
(x[[i]] - x_bar)/sqrt(sum((x[[i]] - x_bar) ^ 2)/(n - 1))
}
} else {
N <- length(asset.names)
exposures <- matrix(w * x[[i]], nrow = N)
sigmaEWMA <- stdExpo <- exposures
meanExp <- apply(exposures, 1, mean)
sigmaExp <- apply(exposures, 1, sd)
for (j in 1:N) {
ts <- (exposures[j, ] - meanExp[j]) ^ 2
var_past_2 <- sigmaExp[j] ^ 2
sigmaEWMA[j, ] <- sapply(ts, function(x) var_past_2 <<- 0.10 * x + 0.90 * var_past_2)
}
as.vector((exposures - meanExp) / sqrt(sigmaEWMA))
}
}
#' @param object a fit object of class \code{ffm} which is returned by
#' \code{fitFfm}
#' @rdname fitFfm
#' @method coef ffm
#' @export
coef.ffm <- function(object, ...) {
# these are the last period factor exposures
# already computed through fitFfm
return(object$beta)
}
#' @rdname fitFfm
#' @method fitted ffm
#' @export
fitted.ffm <- function(object, ...) {
# get fitted values for all assets in each time period
# transpose and convert into xts/zoo objects
fitted.xts <- checkData(t(sapply(object$factor.fit, fitted)))
names(fitted.xts) <- object$asset.names
return(fitted.xts)
}
#' @rdname fitFfm
#' @method residuals ffm
#' @export
residuals.ffm <- function(object, ...) {
return(object$residuals)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.