#' Causal Model Selection Tests for outcomes given driver and covariates
#'
#' Run CMST tests the causal direction between the first outcome \code{(y1)} and all other outcomes \code{(y2, ...)},
#' adjusting for the driver and covariates. The models tested (ignoring covariates and noise) are:
#' \itemize{
#' \item{\code{M1.y2.y1.z}} : \code{y2 <- y1 <- z}
#' \item{\code{M2.y1.y2.z}} : \code{y1 <- y2 <- z}
#' \item{\code{M3.y1.z.y2}} : \code{y1 <- z -> y2}
#' \item{\code{M4.y12.z}} : \code{y1 <- z -> y2} and \code{y1 <-> y2}
#' }
#' in which the arrows indicate direction of causality. For \code{M4}, the directionality between \code{y1} and \code{y2} is ambiguous.
#' The noise on outcomes \code{yi} is assumed to be normal.
#' The driver \code{z} is assumed to be causal for one or more outcomes \code{(y1, y2, ...)}, and may be categorical or continuous.
#' The covariates \code{X}, qualitative or quantitative, may act additively or be interactive with the driver.
#' The \code{driver} is the driver for outcomes, with covariates acting on outcomess.
#' The \code{resp_names}, \code{addcov} and \code{intcov} names must all be valid names in the \code{outcomes} data frame.
#'
#' @param outcomes data frame with outcomes
#' @param driver data frame with drivers
#' @param resp_names response names
#' @param addcov,intcov additive and interactive covariate names for responses
#' @param method method for CMST test (parametric, non-parametric, joint or all three); can provide more than one value.
#' @param penalty type of information criteria penalty (for BIC or AIC)
#' @param verbose verbose output if \code{TRUE}
#' @param fitFunction log likelihood calculation function; see \code{\link{fitDefault}}
#' @param ... possible additional arguments
#'
#' @export
#' @importFrom dplyr tbl_df
#' @importFrom mnormt pmnorm
#' @importFrom corpcor is.positive.definite make.positive.definite
#'
cmst <- function(driver, outcomes, covariates=NULL, addcov=NULL, intcov=NULL,
method = c("par", "non.par", "joint", "all"),
penalty = c("bic", "aic", "both"),
verbose = FALSE,
fitFunction = fitDefault,
...) {
if(any(is.na(c(driver))) |
any(is.na(c(outcomes))))
stop("missing values not allowed")
if(!is.null(covariates)) {
if(any(is.na(c(covariates))))
stop("missing values not allowed")
}
resp_names <- names(outcomes)
if(length(resp_names) < 2)
stop("need at least 2 response names")
n_ind <- nrow(outcomes)
n_drv <- nrow(driver)
n_cov <- nrow(covariates)
if(n_ind != n_drv)
stop("driver, outcomes and covariates must have same number of rows")
if(!is.null(covariates)) {
if(n_ind != n_cov | n_drv != n_cov)
stop("driver, outcomes and covariates must have same number of rows")
if(!is.list(addcov)) {
addcov <- list(addcov)
if(length(addcov) == 1)
addcov[[2]] <- NULL
}
if(!is.list(intcov)) {
intcov <- list(intcov)
if(length(intcov) == 1)
intcov[[2]] <- NULL
}
for(i in 1:2) {
if(!is.null(addcov[[i]])) {
# Make sure addcov has all intcov.
uname <- unique(c(addcov[[i]], intcov[[i]]))
if(!all(uname %in% names(covariates)))
stop("some of covariate names not in covariates data frame")
addcov[[i]] <- covariates[, uname, drop = FALSE]
}
if(!is.null(intcov[[i]]))
intcov[[i]] <- covariates[, intcov[[i]], drop = FALSE]
}
}
method <- match.arg(method, several.ok = TRUE)
if("all" %in% method)
method <- c("par", "non.par", "joint")
penalty <- match.arg(penalty, several.ok = TRUE)
if("both" %in% penalty)
penalty <- c("bic", "aic")
ntests <- length(resp_names) - 1
aux <- vector(mode = "list", length = ntests)
nms <- names(aux) <- paste(resp_names[1], resp_names[-1], sep = "_")
for(k in 1 : ntests) {
if(verbose)
cat("outcomes2 = ", k, "\n")
aux[[k]] <- cmst1(driver, outcomes[, c(1, 1 + k)], addcov, intcov,
method, penalty, fitFunction, ...)
}
models <- model_setup()
# Rearrange results.
out <- vector(mode = "list", length = 6)
names(out) <- c("pvals", "AIC", "BIC", "AIC.Z", "BIC.Z", "R2")
# Now populate the list
out$pvals <- vector(mode = "list", length = length(nms))
names(out$pvals) <- nms
pval_names <-
paste(rep(c("BIC","AIC"), 3),
rep(c("par","nonpar","joint"), each = 2),
sep = ".")
names(pval_names) <-
paste(rep("pvals", 6),
rep(c("p","np","j"), each = 2),
rep(c("BIC","AIC"), 3),
sep = ".")
out$pvals <- lapply(aux, function(x, pval_names) {
pvals <- grep("pvals", names(x))
out <- t(as.data.frame(x[pvals]))
rownames(out) <- pval_names[rownames(out)]
out
}, pval_names)
for(stat in c("AIC","BIC","AIC.Z","BIC.Z")) {
out[[stat]] <- t(sapply(aux, function(x, stat) x[[stat]], stat))
rownames(out[[stat]]) <- nms
}
out$R2 <- t(sapply(aux, function(x) x$R2))
rownames(out$R2) <- nms
class(out) <- c("cmst", class(out))
out
}
#' @export
print.cmst <- function(x, ...) {
cat("\nCausal Model Select Test p-values by outcome pair\n")
for(outcome_pair in names(x$pvals)) {
cat(outcome_pair, "\n")
print(x$pvals[[outcome_pair]])
}
for(IC in c("AIC","BIC")) {
if(!is.null(x$AIC)) {
cat("\n", IC, "statistics\n")
print(x[[IC]])
print(x[[paste0(IC, ".Z")]])
}
}
cat("\nExplained variation (R2)")
print(x$R2)
invisible()
}
model_setup <- function() {
models <- dplyr::tbl_df(matrix(c(
"M1.y2.y1.z", "y1.z", "y2.y1",
"M2.y1.y2.z", "y2.z", "y1.y2",
"M3.y1.z.y2", "y2.z", "y1.z",
"M4.y12.z", "y1.z", "y2.y1z"),
4, 3, byrow = TRUE))
names(models) <- c("model","first","second")
models
}
cmst1 <- function(driver, outcomes, addcov=NULL, intcov=NULL,
method = c("par", "non.par", "joint", "all"),
penalty = c("bic", "aic", "both"),
fitFunction, ...) {
# Want to roll more of this into using cmst_default.
# May need to rethink comparison of models a bit.
resp_names <- names(outcomes)
if(length(resp_names) != 2)
stop("only two outcomes for cmst1 call")
outcomes <- outcomes[resp_names]
# log.lik.1 #
dfcol <- function(x, y, i) as.data.frame(cbind(x, as.matrix(y[,i, drop = FALSE])))
ll_list <- list()
ll_list[["y1.z"]] <-
fitFunction(driver, outcomes[[1]],
addcov[[1]], intcov[[1]])
ll_list[["y2.z"]] <-
fitFunction(driver, outcomes[[2]],
addcov[[2]], intcov[[2]])
ll_list[["y1.y2"]] <-
fitFunction(as.matrix(outcomes[[2]]), outcomes[[1]],
addcov[[1]], intcov[[1]])
ll_list[["y2.y1"]] <-
fitFunction(as.matrix(outcomes[[1]]), outcomes[[2]],
addcov[[2]], intcov[[2]])
ll_list[["y2.y1z"]] <-
fitFunction(as.matrix(cbind(driver, outcomes[[1]])), outcomes[[2]],
addcov[[2]], intcov[[2]])
models <- model_setup()
R2 <- rep(0,2)
names(R2) <- models$first[1:2]
for(i in 1:2) {
TSS <- sum((outcomes[[resp_names[i]]] -
mean(outcomes[[resp_names[i]]]))^2)
R2[models$first[i]] <- 1 -
(ll_list[[models$first[i]]]$RSS / TSS)
}
LR <- model.dim <- rep(0,4)
indLR <- matrix(0, n_ind, 4)
names(LR) <- names(model.dim) <- colnames(indLR) <- models$model
for(i in 1:4) {
fit1 <- ll_list[[models$first[i]]]
fit2 <- ll_list[[models$second[i]]]
LR[models$model[i]] <- fit1$LR + fit2$LR
model.dim[models$model[i]] <- 2 + fit1$df + fit2$df
indLR[, models$model[i]] <- fit1$indLR + fit2$indLR
}
models <- list(
LR = LR,
indLR = as.data.frame(indLR),
df = model.dim
)
out <- list(resp = resp_names,
addcov = addcov,
intcov = intcov,
n.ind = n_ind,
LR = LR,
model.dim = model.dim,
R2 = R2,
S.hat = calcShat(indLR))
# Set up information criteria.
if("bic" %in% penalty & ("par" %in% method | "joint" %in% method)) {
out$BIC <- calcICs(models, "B")
BIC.Z <- calcZ(models, out$S.hat, out$BIC, "B")
out$BIC.Z <- BIC.Z
}
if("aic" %in% penalty & ("par" %in% method | "joint" %in% method)) {
out$AIC <- calcICs(models, "A")
AIC.Z <- calcZ(models, out$S.hat, out$AIC, "A")
out$AIC.Z <- AIC.Z
}
if("par" %in% method) {
if("bic" %in% penalty)
out$pvals.p.BIC <- normIUCMST(models, BIC.Z)
if("aic" %in% penalty)
out$pvals.p.AIC <- normIUCMST(models, AIC.Z)
}
if("non.par" %in% method) {
if("bic" %in% penalty)
out$pvals.np.BIC <- binomIUCMST(models, "B")
if("aic" %in% penalty)
out$pvals.np.AIC <- binomIUCMST(models, "A")
}
if("joint" %in% method) {
if("bic" %in% penalty)
out$pvals.j.BIC <- normJointIUCMST(models, BIC.Z)
if("aic" %in% penalty)
out$pvals.j.AIC <- normJointIUCMST(models, AIC.Z)
}
pvals <- grep("pvals", names(out))
out[pvals] <- lapply(out[pvals], function(x,m) {
names(x) <- m
x
}, models$model)
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.