#' Return Indices of Model Performance
#'
#' Generic function. Generally returns things like
#' fit indices, absolute error metrics, tests of
#' overall model significance.
#'
#' For \code{lm} class objects, return number of observations,
#' AIC, BIC, log likelihood, R2, overall model F test, and p-value.
#'
#' @param object A fitted model object. The class of the model
#' determines which specific method is called.
#' @param ... Additional arguments passed to specific methods.
#' @return A \code{data.table} with results.
#' @export
#' @rdname modelPerformance
modelPerformance <- function(object, ...) {
UseMethod("modelPerformance", object)
}
#' @param x A object (e.g., list or a modelPerformance object) to
#' test or attempt coercing to a modelPerformance object.
#' @importFrom data.table is.data.table as.data.table
#' @rdname modelPerformance
#' @export
as.modelPerformance <- function(x) {
if (!is.modelPerformance(x)) {
if (!is.list(x)) {
stop("Input must be a list or a modelPerformance object")
}
augmentClass <- attr(x, "augmentClass")
x <- list(
Performance = x[[1]])
if (is.null(augmentClass)) {
class(x) <- "modelPerformance"
} else {
class(x) <- c(paste0("modelPerformance.", augmentClass),
"modelPerformance")
}
}
## checks that the object is not malformed
stopifnot("Model" %in% names(x[[1]]))
stopifnot(is.data.table(x[[1]]))
return(x)
}
#' @rdname modelPerformance
#' @export
is.modelPerformance <- function(x) {
inherits(x, "modelPerformance")
}
#' @rdname modelPerformance
#' @export
#' @method modelPerformance lm
#' @return A list with a \code{data.table} with the following elements:
#' \describe{
#' \item{Model}{A character string indicating the model type, here lm}
#' \item{N_Obs}{The number of observations}
#' \item{AIC}{Akaike Information Criterion}
#' \item{BIC}{Bayesian Information Criterion}
#' \item{LL}{log likelihood}
#' \item{LLDF}{log likelihood degrees of freedom}
#' \item{Sigma}{Residual variability}
#' \item{R2}{in sample variance explained}
#' \item{F2}{Cohen's F2 effect size R2 / (1 - R2)}
#' \item{AdjR2}{adjusted variance explained}
#' \item{F}{F value for overall model significance test}
#' \item{FNumDF}{numerator degrees of freedom for F test}
#' \item{FDenDF}{denominator degrees of freedom for F test}
#' \item{P}{p-value for overall model F test}
#' }
#' @examples
#' modelPerformance(lm(mpg ~ qsec * hp, data = mtcars))
#'
#' modelPerformance(lm(mpg ~ hp, data = mtcars))
#'
#' \dontrun{
#' modelPerformance(lm(mpg ~ 0 + hp, data = mtcars))
#' modelPerformance(lm(mpg ~ 1, data = mtcars))
#' modelPerformance(lm(mpg ~ 0, data = mtcars))
#' }
modelPerformance.lm <- function(object, ...) {
LL <- logLik(object)
LLdf <- attr(LL, "df")
msum <- summary(object)
## empty model or intercept only model
if (object$rank == 0 || (object$rank == 1 &&
attr(terms(object), "intercept") == 1)) {
msum$fstatistic <- c(value = NA_real_, numdf = NA_real_, dendf = NA_real_)
}
P <- pf(msum$fstatistic[["value"]],
msum$fstatistic[["numdf"]],
msum$fstatistic[["dendf"]],
lower.tail = FALSE)
out <- data.table(
Model = as.character("lm"),
N_Obs = as.numeric(nrow(model.matrix(object))),
AIC = as.numeric(AIC(object)),
BIC = as.numeric(BIC(object)),
LL = as.numeric(LL),
LLDF = as.numeric(LLdf),
Sigma = as.numeric(msum$sigma),
R2 = as.numeric(msum$r.squared),
F2 = as.numeric(msum$r.squared / (1 - msum$r.squared)),
AdjR2 = as.numeric(msum$adj.r.squared),
F = as.numeric(msum$fstatistic[["value"]]),
FNumDF = as.numeric(msum$fstatistic[["numdf"]]),
FDenDF = as.numeric(msum$fstatistic[["dendf"]]),
P = as.numeric(P))
out <- list(out)
attr(out, "augmentClass") <- "lm"
as.modelPerformance(out)
}
#' Calculate R2 Values
#'
#' Generic function to return variance explained (R2)
#' estimates from various models. In some cases these will be true
#' R2 values, in other cases they may be pseudo-R2 values if
#' R2 is not strictly defined for a model.
#'
#' @param object A fitted model object.
#' @param ... Additional arguments passed to specific methods.
#' @return Depends on the method dispatch.
#' @export
#' @rdname R2
R2 <- function(object, ...) {
UseMethod("R2", object)
}
#' @return The raw and adjusted r-squared value.
#' @method R2 lm
#' @export
#' @rdname R2
#' @examples
#' R2(lm(mpg ~ qsec * hp, data = mtcars))
R2.lm <- function(object, ...) {
unlist(modelPerformance(object)$Performance[, c("R2", "AdjR2"), with = FALSE])
}
#' Compare Two Models
#'
#' Generic function.
#'
#' @param model1 A fitted model object.
#' @param model2 A fitted model object to compare to \code{model1}
#' @param ... Additional arguments passed to specific methods.
#' @return Depends on the method dispatch.
#' @rdname modelCompare
#' @export
modelCompare <- function(model1, model2, ...) {
UseMethod("modelCompare", model1)
}
#' @param x An object (e.g., list or a modelCompare object) to
#' test or attempt coercing to a modelCompare object.
#' @importFrom data.table is.data.table as.data.table
#' @rdname modelCompare
#' @export
as.modelCompare <- function(x) {
if (!is.modelCompare(x)) {
if(!is.list(x)) {
stop("Input must be a list or a modelCompare object")
}
augmentClass <- attr(x, "augmentClass")
x <- list(
Comparison = x[[1]])
if(is.null(augmentClass)) {
class(x) <- "modelCompare"
} else {
class(x) <- c(paste0("modelCompare.", augmentClass), "modelCompare")
}
}
## checks that the object is not malformed
stopifnot("Model" %in% names(x[[1]]))
stopifnot(is.data.table(x[[1]]))
stopifnot(nrow(x[[1]]) > 2)
return(x)
}
#' @rdname modelCompare
#' @export
is.modelCompare <- function(x) {
inherits(x, "modelCompare")
}
## clear R CMD CHECK notes
if (getRversion() >= "2.15.1") utils::globalVariables(c("F2", "FNumDF", "FDenDF"))
#' @rdname modelCompare
#' @importFrom data.table data.table
#' @importFrom stats logLik anova AIC BIC
#' @method modelCompare lm
#' @export
#' @examples
#' m1 <- lm(mpg ~ qsec * hp, data = mtcars)
#'
#' m2 <- lm(mpg ~ am, data = mtcars)
#'
#' modelCompare(m1, m2)
#'
#' ## cleanup
#' rm(m1, m2)
#'
#' \dontrun{
#' m3 <- lm(mpg ~ 1, data = mtcars)
#' m4 <- lm(mpg ~ 0, data = mtcars)
#' modelCompare(m3, m4)
#'
#' ## cleanup
#' rm(m3, m4)
#' }
modelCompare.lm <- function(model1, model2, ...) {
stopifnot(identical(class(model1), class(model2)))
i1 <- modelPerformance(model1)$Performance
i2 <- modelPerformance(model2)$Performance
if (identical(i1$LLDF, i2$LLDF)) {
stop("One model must be nested within the other")
}
if (i1$LLDF > i2$LLDF) {
i3 <- i1
model3 <- model1
i1 <- i2
model1 <- model2
i2 <- i3
model2 <- model3
rm(i3, model3)
}
test <- anova(model1, model2, test = "F")
i1$Model <- "Reduced"
i2$Model <- "Full"
out <- rbind(
i1, i2,
cbind(Model = "Difference", i2[,-1] - i1[,-1]))
out[3, F2 := R2 / (1 - out[2, R2])]
out[3, F := test$F[2]]
out[3, FNumDF := test$Df[2]]
out[3, FDenDF := test$Res.Df[2]]
out[3, P := test[["Pr(>F)"]][2]]
out <- list(out)
attr(out, "augmentClass") <- "lm"
as.modelCompare(out)
}
#' Detailed Tests on Models
#'
#' TODO: make me!
#'
#' @param object A fitted model object.
#' @param ... Additional arguments passed to specific methods.
#' @return Depends on the method dispatch.
#' @rdname modelTest
#' @export
modelTest <- function(object, ...) {
UseMethod("modelTest", object)
}
#' @export
#' @rdname modelTest
is.modelTest <- function(x) {
inherits(x, "modelTest")
}
#' @param x A object (e.g., list or a modelTest object) to
#' test or attempt coercing to a modelTest object.
#' @importFrom data.table is.data.table as.data.table
#' @export
#' @rdname modelTest
as.modelTest <- function(x) {
if (!is.modelTest(x)) {
if(!is.list(x)) {
stop("Input must be a list or a modelTest object")
}
augmentClass <- attr(x, "augmentClass")
x <- list(
FixedEffects = x[[1]],
RandomEffects = x[[2]],
EffectSizes = x[[3]],
OverallModel = x[[4]])
if(is.null(augmentClass)) {
class(x) <- "modelTest"
} else {
class(x) <- c(paste0("modelTest.", augmentClass), "modelTest")
}
}
stopifnot(is.modelTest(x))
stopifnot(identical(length(x), 4L))
stopifnot(identical(names(x),
c("FixedEffects",
"RandomEffects",
"EffectSizes",
"OverallModel")))
return(x)
}
## clear R CMD CHECK notes
if (getRversion() >= "2.15.1") utils::globalVariables(c("K"))
#' At the moment, \code{modelTest.vglm} method only handles the multinomial
#' family, although this may get expanded in the future.
#'
#' @return A list with two elements.
#' \code{Results} contains a data table of the actual estimates.
#' \code{Table} contains a nicely formatted character matrix.
#' @method modelTest vglm
#' @rdname modelTest
#' @export
#' @importFrom stats model.matrix vcov formula terms update anova
#' @importFrom VGAM vglm multinomial summary anova.vglm vlm
#' @importMethodsFrom VGAM vcov lrtest
#' @importFrom data.table data.table
#' @examples
#' mtcars$cyl <- factor(mtcars$cyl)
#' m <- VGAM::vglm(cyl ~ qsec,
#' family = VGAM::multinomial(), data = mtcars)
#' modelTest(m)
#'
#' ## clean up
#' rm(m, mtcars)
#'
#' \dontrun{
#' mtcars$cyl <- factor(mtcars$cyl)
#' mtcars$am <- factor(mtcars$am)
#' m <- VGAM::vglm(cyl ~ qsec,
#' family = VGAM::multinomial(), data = mtcars)
#' modelTest(m)
#'
#' m <- VGAM::vglm(cyl ~ scale(qsec),
#' family = VGAM::multinomial(), data = mtcars)
#' modelTest(m)
#'
#' m2 <- VGAM::vglm(cyl ~ factor(vs) * scale(qsec),
#' family = VGAM::multinomial(), data = mtcars)
#' modelTest(m2)
#'
#' m <- VGAM::vglm(Species ~ Sepal.Length,
#' family = VGAM::multinomial(), data = iris)
#' modelTest(m)
#'
#' set.seed(1234)
#' sampdata <- data.frame(
#' Outcome = factor(sample(letters[1:3], 20 * 9, TRUE)),
#' C1 = rnorm(20 * 9),
#' D3 = sample(paste0("L", 1:3), 20 * 9, TRUE))
#'
#' m <- VGAM::vglm(Outcome ~ factor(D3),
#' family = VGAM::multinomial(), data = sampdata)
#' modelTest(m)
#'
#' m <- VGAM::vglm(Outcome ~ factor(D3) + C1,
#' family = VGAM::multinomial(), data = sampdata)
#' modelTest(m)
#' }
modelTest.vglm <- function(object, ...) {
if ("multinomial" %in% object@family@vfamily) {
"do something"
} else {
stop("can only deal with multinomial family vglm models right now")
}
k <- seq_len(ncol(object@y))
if (length(k) < 3) stop("DV must have at least 3 levels, after omitting missing data")
## k - 1 comparisons
nk <- seq_along(k)[-length(k)]
## what are the predictors
ivterms <- attr(terms(formula(object)), "term.labels")
## first get position of each term
ivterms.pos <- attr(VGAM::model.matrix(object), "assign")
coefNames <- names(VGAM::coef(object))
coefNum <- as.integer(gsub("^(.*)\\:([1-9]*)$", "\\2", coefNames))
coefTerm <- gsub("^(.*)\\:([1-9]*)$", "\\1", names(VGAM::coef(object)))
coefInfo <- data.table(
Num = coefNum,
Names = coefTerm)
for (v in seq_along(ivterms.pos)) {
coefInfo[ivterms.pos[[v]], Term := names(ivterms.pos)[v]]
}
## models with different contrasts
m <- lapply(nk, function(i) {
update(object, family = VGAM::multinomial(refLevel = i))
})
m.res <- lapply(nk, function(i) {
tab <- VGAM::coef(VGAM::summary(m[[i]]))
ci <- VGAM::confint(m[[i]])
out <- copy(coefInfo)
out[, Ref := k[i]]
out[, Est := tab[, "Estimate"]]
out[, SE := tab[, "Std. Error"]]
out[, Pval := tab[, "Pr(>|z|)"]]
out[, LL := ci[, 1]]
out[, UL := ci[, 2]]
out[, K := length(k)]
out[, Comp := k[-i][Num]]
return(out)
})
## omnibus p-values
terms.res <- lapply(ivterms, function(v) {
i <- ivterms.pos[[v]]
if (identical(length(ivterms), 1L)) {
out <- VGAM::lrtest(object, update(object, as.formula(sprintf(". ~ . - %s", v))))
out <- data.table(
Term = v,
Chisq = out@Body$Chisq[2],
DF = out@Body$Df[2],
Pval = out@Body[["Pr(>Chisq)"]][2],
Type = "Fixed")
} else {
out <- anova(object, type = "III")
out <- data.table(
Term = v,
Chisq = out[v, "Deviance"],
DF = out[v, "Df"],
Pval = out[v, "Pr(>Chi)"],
Type = "Fixed")
}
est <- do.call(rbind, lapply(nk, function(j) m.res[[j]][i][Num >= j]))
est[, Labels := sprintf("%s vs. %s", Comp, Ref)]
return(list(Coefs = est, ES = out))
})
out <- list(
do.call(rbind, lapply(terms.res, `[[`, "Coefs")),
NA,
do.call(rbind, lapply(terms.res, `[[`, "ES")),
NA)
attr(out, "augmentClass") <- "vglm"
as.modelTest(out)
}
## clear R CMD CHECK notes
if (getRversion() >= "2.15.1") utils::globalVariables(c("Model", "Type"))
#' Modified lm() to use a specified design matrix
#'
#' This function is a minor modification of the lm() function
#' to allow the use of a pre-specified design matrix. It is not intended for
#' public use but only to support \code{modelTest.lm}.
#'
#' @param formula An object of class "formula" although it is only minimally used
#' @param data the dataset
#' @param subset subset
#' @param weights any weights
#' @param na.action Defaults to \code{na.omit}
#' @param model defaults to \code{TRUE}
#' @param x defaults to \code{FALSE}
#' @param y defaults to \code{FALSE}
#' @param qr defaults to \code{TRUE}
#' @param singular.ok defaults to \code{TRUE}
#' @param contrasts defaults to \code{NULL}
#' @param offset missing by default
#' @param designMatrix a model matrix / design matrix (all numeric, pre coded if applicable for discrete variables)
#' @param yObserved the observed y values
#' @param ... additional arguments
#' @return an lm class object
#' @importFrom stats .getXlevels is.empty.model lm.fit lm.wfit model.offset model.weights
#' @seealso \code{lm}
#' @examples
#' mtcars$cyl <- factor(mtcars$cyl)
#' m <- lm(mpg ~ hp * cyl, data = mtcars)
#'
#' x <- model.matrix(m)
#' y <- mtcars$mpg
#' m2 <- JWileymisc:::lm2(mpg ~ 1 + cyl + hp:cyl, data = mtcars,
#' designMatrix = x[, -2, drop = FALSE],
#' yObserved = y)
#'
#' anova(m, m2)
#'
#' rm(m, m2, x, y)
lm2 <- function (formula, data, subset, weights, na.action,## method = "qr",
model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE,
contrasts = NULL, offset, designMatrix, yObserved, ...) {
ret.x <- x
ret.y <- y
cl <- match.call()
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action",
"offset"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- quote(stats::model.frame)
mf <- eval(mf, parent.frame())
## ## currently removed as not applicable for current use
## ## but may be needed if modelTest() ever expands / changes
## if (method == "model.frame")
## return(mf)
## else if (method != "qr")
## warning(gettextf("method = '%s' is not supported. Using 'qr'",
## method), domain = NA)
mt <- attr(mf, "terms")
y <- yObserved
w <- as.vector(model.weights(mf))
if (!is.null(w) && !is.numeric(w))
stop("'weights' must be a numeric vector")
offset <- model.offset(mf)
mlm <- is.matrix(y)
ny <- if (mlm)
nrow(y)
else length(y)
if (!is.null(offset)) {
if (!mlm)
offset <- as.vector(offset)
if (NROW(offset) != ny)
stop(gettextf(
"number of offsets is %d, should equal %d (number of observations)",
NROW(offset), ny), domain = NA)
}
if (is.empty.model(mt)) {
x <- NULL
z <- list(coefficients = if (mlm) matrix(NA_real_, 0,
ncol(y)) else numeric(), residuals = y, fitted.values = 0 *
y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w !=
0) else ny)
if (!is.null(offset)) {
z$fitted.values <- offset
z$residuals <- y - offset
}
} else {
x <- designMatrix
z <- if (is.null(w))
lm.fit(x, y, offset = offset, singular.ok = singular.ok,
...)
else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
...)
}
class(z) <- c(if (mlm) "mlm", "lm")
z$na.action <- attr(mf, "na.action")
z$offset <- offset
z$contrasts <- attr(x, "contrasts")
z$xlevels <- .getXlevels(mt, mf)
z$call <- cl
z$terms <- mt
if (model) {
z$model <- mf
}
if (ret.x) {
z$x <- x
}
if (ret.y) {
z$y <- y
}
if (!qr) {
z$qr <- NULL
}
return(z)
}
#' @return A list with two elements.
#' \code{Results} contains a data table of the actual estimates.
#' \code{Table} contains a nicely formatted character matrix.
#' @method modelTest lm
#' @rdname modelTest
#' @export
#' @importFrom stats model.matrix vcov formula terms update anova
#' @importFrom data.table data.table
#' @importFrom extraoperators %s!in% %?!in%
#' @examples
#' m1 <- lm(mpg ~ qsec * hp, data = mtcars)
#' modelTest(m1)
#'
#' mtcars$cyl <- factor(mtcars$cyl)
#' m2 <- lm(mpg ~ cyl, data = mtcars)
#' modelTest(m2)
#'
#' m3 <- lm(mpg ~ hp * cyl, data = mtcars)
#' modelTest(m3)
#'
#' m4 <- lm(sqrt(mpg) ~ hp * cyl, data = mtcars)
#' modelTest(m4)
#'
#' m5 <- lm(mpg ~ sqrt(hp) * cyl, data = mtcars)
#' modelTest(m5)
#'
#' ## cleanup
#' rm(m1, m2, m3, m4, m5, mtcars)
modelTest.lm <- function(object, ...) {
## get formula
f <- formula(object)
fe.terms <- terms(f)
fe.labs <- labels(fe.terms)
fe.intercept <- if (identical(attr(fe.terms, "intercept"), 1L)) "1" else "0"
mf <- model.frame(object)
vnames <- all.vars(formula(object))
if (identical(ncol(mf), length(vnames))) {
names(mf) <- vnames
} else {
stop(sprintf(paste0(
"There are %d columns in the model frame [%s],\n ",
"but %d variable names in the formula [%s].\n ",
"If an on-the-fly transformation was applied,\n ",
"try creating the variable / transformation as a new ",
"variable in the dataset and re-running."),
ncol(mf),
paste(names(mf), collapse = ", "),
length(vnames),
paste(vnames, collapse = ", " )))
}
x <- model.matrix(object)
y <- object$residuals + object$fitted.values
cis <- confint(object)
msum <- summary(object)
fes <- data.table(
Term = rownames(cis),
Est = as.numeric(coef(object)),
LL = cis[, 1],
UL = cis[, 2],
Pval = coef(msum)[, "Pr(>|t|)"])
out.tests <- do.call(rbind, lapply(seq_along(fe.labs), function(i) {
out <- data.table(
Model = NA_character_, N_Obs = NA_real_, AIC = NA_real_,
BIC = NA_real_, LL = NA_real_, LLDF = NA_real_, Sigma = NA_real_,
R2 = NA_real_, F2 = NA_real_, AdjR2 = NA_real_, F = NA_real_,
FNumDF = NA_real_, FDenDF = NA_real_, P = NA_real_)
if (!is.na(fe.labs[[i]])) {
use.fe.labs <- fe.labs %s!in% fe.labs[[i]]
out.f <- sprintf("%s ~ %s%s%s",
as.character(f)[2],
fe.intercept,
if (length(use.fe.labs)) " + " else "",
paste(use.fe.labs, collapse = " + "))
use <- attr(x, "assign") %?!in% i
reduced <- lm2(as.formula(out.f),
data = mf,
designMatrix = x[, use, drop = FALSE], yObserved = y)
out <- modelCompare(object, reduced)$Comparison[Model == "Difference"]
}
setnames(out, old = "Model", new = "Term")
out$Term <- fe.labs[[i]]
return(out)
}))
out.tests[, Type := "Fixed"]
out <- list(fes, NA, out.tests,
modelPerformance(object))
attr(out, "augmentClass") <- "lm"
as.modelTest(out)
}
## clear R CMD CHECK notes
if (getRversion() >= "2.15.1") {
utils::globalVariables(c(
"Comp", "Num", "Labels", "Ref", "Term",
"B", "P", "LL", "UL", "SE"))
}
## clear R CMD CHECK notes
if (getRversion() >= "2.15.1") utils::globalVariables(c("V2", "Index", ".N"))
##' Function to find significant regions from an interaction
##'
##' This function uses the \code{contrast} function from \pkg{rms} to
##' find the threshold for significance from interactions.
##'
##'
##' @param object A fitted rms object
##' @param l1 the first set of values to fix for the contrast function
##' @param l2 the second set of values to fix for the contrast function
##' @param name.vary the name of the model parameter to vary values for
##' to find the threshold. Note that this should not be included in
##' \code{l1} or \code{l2} arguments.
##' @param lower The lower bound to search for values for the varying value
##' @param upper The upper bound to search for values for the varying value
##' @param alpha The significance threshold, defaults to \code{.05}
##' @param starts Number of starting values to try between the
##' lower and upper bounds.
##' @return A data table with notes if no convergence or significance
##' thresholds (if any).
##' @export
##' @importFrom data.table as.data.table data.table
##' @importFrom rms contrast
##' @importFrom stats optim
##' @examples
##' ## make me
findSigRegions <- function(object, l1, l2, name.vary, lower, upper, alpha = .05, starts = 50) {
foo <- function(x) {
tmp1 <- l1
tmp2 <- l2
tmp1[name.vary] <- x
tmp2[name.vary] <- x
out <- abs(alpha - as.data.table(
as.data.frame(
contrast(object, tmp1, tmp2)[c("Pvalue")]))[, Pvalue])
if (is.na(out) || is.nan(out) || !is.finite(out)) {
out <- 9
}
return(out)
}
fstarts <- function(startval) {
res <- optim(par = startval, fn = foo,
lower = lower, upper = upper,
method = "L-BFGS-B",
control = list(factr = 1e11, maxit = 500))
conv <- valid <- sig <- FALSE
out <- data.table(A = as.numeric(startval), Contrast = NA_real_, Pvalue = NA_real_, Notes = NA_character_)
setnames(out, names(out), c(name.vary, "Contrast", "Pvalue", "Notes"))
if (isTRUE(all.equal(res$convergence, 0))) {
conv <- TRUE
out$Notes <- "Converged"
if (res$value > (-.Machine$double.eps) & res$value < (.95 + .Machine$double.eps)) {
valid <- TRUE
out$Notes <- paste(out$Notes, "Valid", sep = "; ")
if (res$value < .0001) {
sig <- TRUE
tmp1 <- l1
tmp1[name.vary] <- res$par
tmp2 <- l2
tmp2[name.vary] <- res$par
out <- cbind(
as.data.table(as.data.frame(contrast(object, tmp1, tmp2)[c(name.vary, "Contrast", "Pvalue")])),
Notes = out$Notes)
}
} else {
out$Notes <- paste(out$Notes, "Invalid result", sep = "; ")
}
} else {
out$Notes <- paste("Did not converge", res$convergence, res$message, sep = "; ")
}
return(out)
}
out <- do.call(rbind, lapply(seq(from = lower, to = upper, length.out = starts), fstarts))
if (any(!is.na(out$Pvalue))) {
out <- out[!is.na(Pvalue)]
}
out[, V2 := round(get(name.vary) / abs(lower - upper), 3)]
out <- out[order(V2, -abs(alpha - Pvalue))]
out[, Index := 1:.N, by = V2]
out[Index == 1][, Index := NULL][, V2 := NULL]
}
# clear R CMD CHECK notes
if (getRversion() >= "2.15.1") {
utils::globalVariables(c(
"Pvalue", "xz", "yz", "yhat", "yllz",
"lower", "yulz", "upper", "reglab", "Contrast",
"Lower", "Upper", "x", "ContrastAngle",
"ContrastAngleZ", "Yhat"))
}
##' Function to find significant regions from an interaction
##'
##' This function uses the \code{contrast} function from \pkg{rms} to
##' find the threshold for significance from interactions.
##'
##' @param object A fitted rms object
##' @param predList TODO
##' @param contrastList TODO
##' @param xvar TODO
##' @param varyvar TODO
##' @param varyvar.levels TODO
##' @param xlab optional
##' @param ylab TODO
##' @param ratio TODO
##' @param xlim TODO
##' @param ylim TODO
##' @param xbreaks TODO
##' @param xlabels optional
##' @param scale.x optional
##' @param scale.y optional
##' @param starts Number of starting values to try between the
##' lower and upper bounds.
##' @return A data table with notes if no convergence or significance
##' thresholds (if any).
##' @export
##' @importFrom data.table as.data.table
##' @importFrom rms Predict
##' @importFrom ggplot2 ggplot geom_text aes scale_x_continuous theme xlab ylab coord_fixed
##' @importFrom ggplot2 element_blank unit geom_line geom_ribbon aes_string geom_segment
##' @importFrom grid arrow
##' @importFrom ggpubr theme_pubr
##' @examples
##' ## make me
intSigRegGraph <- function(object, predList, contrastList, xvar, varyvar,
varyvar.levels,
xlab = xvar, ylab = "Predicted Values", ratio = 1,
xlim, ylim,
xbreaks, xlabels = xbreaks,
scale.x = c(m = 0, s = 1), scale.y = c(m = 0, s = 1),
starts = 50) {
preds <- as.data.table(do.call(Predict, list(x = object, factors = predList, conf.type = "mean")))
preds[, xz := (get(xvar) - scale.x["m"])/scale.x["s"]]
preds[, yz := (yhat - scale.y["m"])/scale.y["s"]]
preds[, yllz := (lower - scale.y["m"])/scale.y["s"]]
preds[, yulz := (upper - scale.y["m"])/scale.y["s"]]
simpleSlopes <- do.call(rbind, lapply(contrastList, function(x) {
out <- as.data.table(as.data.frame(
contrast(object,
c(x[[-1]], x[[1]][1]), c(x[[-1]], x[[1]][2]), type = "average")[
c("Contrast", "SE", "Lower", "Upper", "Pvalue")]))
out[, reglab := sprintf(
"b = %0.2f [%0.2f, %0.2f], %s",
Contrast, Lower, Upper,
formatPval(Pvalue, 3, 3, includeP = TRUE))]
return(out)
}))
yvals <- preds[, .(y = yhat[which.min(abs(get(xvar) - pmax(min(get(xvar), na.rm = TRUE), min(xlim))))],
yz = yz[which.min(abs(xz - pmax(min(xz, na.rm = TRUE), min(xlim))))]),
by = varyvar]
yvals2 <- preds[,
.(y = yhat[which.min(abs(get(xvar) - pmin(max(get(xvar), na.rm = TRUE), max(xlim))))],
yz = yz[which.min(abs(xz - pmin(max(xz, na.rm = TRUE), max(xlim))))]),
by = varyvar]
if ((abs(diff(yvals$yz))/abs(diff(yvals2$yz)) < .9)) {
use.xmax <- TRUE
simpleSlopes[, x := pmin(max(preds[[xvar]], na.rm = TRUE), max(xlim))]
simpleSlopes[, xz := pmin(max(preds[["xz"]], na.rm = TRUE), max(xlim))]
simpleSlopes <- cbind(simpleSlopes, yvals2)
} else {
use.xmax <- FALSE
simpleSlopes[, x := pmax(min(preds[[xvar]], na.rm = TRUE), min(xlim))]
simpleSlopes[, xz := pmax(min(preds[["xz"]], na.rm = TRUE), min(xlim))]
simpleSlopes <- cbind(simpleSlopes, yvals)
}
simpleSlopes[, ContrastAngle := atan(Contrast * ratio)/(pi/180)]
simpleSlopes[, ContrastAngleZ := atan(Contrast * ratio * scale.x["s"])/(pi/180)]
sigThresh <- findSigRegions(object, contrastList[[1]][[2]], contrastList[[2]][[2]],
name.vary = xvar,
lower = min(preds[[xvar]], na.rm = TRUE),
upper = max(preds[[xvar]], na.rm = TRUE),
starts = starts)
sigThresh[, xz := (get(xvar) - scale.x["m"])/scale.x["s"]]
anysig <- any(!is.na(sigThresh$Pvalue))
if (anysig) {
sigThreshArrows <- lapply(1:nrow(sigThresh), function(i) {
z <- sigThresh[i, xz]
merge(cbind(preds[, .(Yhat = yhat[which.min(abs(xz - z))]), by = varyvar],
xz = z), sigThresh[i], by = "xz", all = TRUE)
})
} else {
sigThreshArrows <- list(NA)
}
if (missing(varyvar.levels)) {
varyvar.levels <- list(levels = unique(preds[[varyvar]]), labels = unique(preds[[varyvar]]))
}
preds[, (varyvar) := factor(get(varyvar), levels = varyvar.levels$levels, labels = varyvar.levels$labels)]
finalOut <- list(
Predictions = preds,
simpleSlopes = simpleSlopes,
significantThresholds = sigThresh,
significantThresholdArrows = sigThreshArrows)
if (anysig) {
arrow.geoms <- lapply(sigThreshArrows, function(arr) {
geom_segment(aes(x = xz[1], y = Yhat[1], xend = xz[2], yend = Yhat[2]),
data = arr,
size=.6, arrow = arrow(length = unit(.03, "npc"), ends = "both"))
})
} else {
arrow.geoms <- list(NA)
}
base.code <- paste0('
ggplot(Predictions, aes(xz, y = yhat)) +
geom_ribbon(aes_string(ymin = "lower", ymax = "upper", group = \"', varyvar, '\"), alpha = .1) +
geom_line(aes_string(linetype = \"', varyvar, '\"), size = 2) +
geom_text(aes(x = xz, y = yz + ', (.05 * diff(ylim)), ', label = reglab, angle = ContrastAngleZ),
data = simpleSlopes, hjust = ', use.xmax, ') +
scale_x_continuous(breaks = ', deparse(xbreaks), ', labels = ', deparse(xlabels), ') +
theme_ggpubr() +
theme(
legend.key.width = unit(2, "cm"),
legend.title = element_blank(),
legend.position = "bottom") +
xlab(', deparse(xlab), ') +
ylab(', deparse(ylab), ') +
coord_fixed(ratio = ', ratio, ',
xlim = ', deparse(xlim), ',
ylim = ', deparse(ylim), ',
expand = FALSE)')
if (anysig) {
p.code <- paste(c(base.code, sapply(1:length(sigThreshArrows), function(i) {
paste0('geom_segment(aes(x = xz[1], y = Yhat[1], xend = xz[2], yend = Yhat[2]),
data = significantThresholdArrows[[', i, ']],
size=.6, arrow = arrow(length = unit(.03, "npc"), ends = "both"))')})),
collapse = " + \n")
} else {
p.code <- base.code
}
p <- with(finalOut, eval(parse(text = p.code)))
finalOut$Graph <- p
finalOut$GraphCode <- p.code
return(finalOut)
}
##' Internal function to run a model using gam()
##'
##' This function is not intended to be called by users.
##'
##' @param formula A character string containing a formula style object.
##' @param type A character string indicating the type of dependent variable.
##' Currently \dQuote{normal}, \dQuote{binary}, or \dQuote{count}.
##' @param data A data frame to be used for analysis.
##' @param \dots Additional arguments passed to \code{gam}.
##' @return A summary of the gam model.
##' @importFrom stats as.formula gaussian binomial poisson
##' @importFrom mgcv gam summary.gam
##' @keywords internal
internalrunIt <- function(formula, type, data, ...) {
summary(gam(formula = as.formula(formula), data = data,
family = switch(type,
normal = gaussian(),
binary = binomial(),
count = poisson()),
...))
}
##' Internal function to create a formula
##'
##' This function is not intended to be called by users.
##' It creates a formula style character string from its argument.
##' But note that it does not actually create a formula class object.
##' If you do not want an argument, use the empty string.
##'
##' @param dv A character string of the dependent variable.
##' @param iv A character string or vector of the independent variables
##' @param covariates A character string or vector of the dependent variables
##' @return A character string
##' @keywords internal
##' @examples
##' JWileymisc:::internalformulaIt("mpg", "hp", "am")
##' JWileymisc:::internalformulaIt("mpg", "hp", "")
##' JWileymisc:::internalformulaIt("mpg", "", "am")
internalformulaIt <- function(dv, iv, covariates) {
type <- paste0(
c("", "iv")[(length(iv) & any(nzchar(iv))) + 1],
c("", "cov")[(length(covariates) & any(nzchar(covariates))) + 1])
switch(type,
ivcov = sprintf("%s ~ %s + %s", dv,
paste(iv, collapse = " + "),
paste(covariates, collapse = " + ")),
iv = sprintf("%s ~ %s", dv, paste(iv, collapse = " + ")),
cov = sprintf("%s ~ %s", dv, paste(covariates, collapse = " + ")))
}
##' Compares the effects of various independent variables
##'
##' This is an internal function designed to run many models to compare the
##' unique predictive effect of different IVs with and without covariates on an
##' outcome.
##'
##' @param dv A character string of the depentent variable
##' @param type A character string indicating the type of dependent variable
##' @param iv A character string or vector giving the IV(s)
##' @param covariates A character string or vector giving the covariate(s)
##' @param data The data to be used for analysis
##' @param multivariate A logical value whether to have models with
##' all IVs simultaneously.
##' @param \ldots Additional arguments passed on to the internal
##' function, \code{.runIt}.
##' @return A list with all the model results.
##' @keywords internal
##' @importFrom stats na.omit get_all_vars
##' @importFrom data.table data.table
##' @examples
##' test1 <- JWileymisc:::internalcompareIV(
##' dv = "mpg", type = "normal",
##' iv = "hp",
##' covariates = "am",
##' data = mtcars, multivariate = FALSE)
##' test1$Summary
##' rm(test1)
internalcompareIV <- function(dv, type = c("normal", "binary", "count"),
iv, covariates = character(), data,
multivariate = FALSE, ...) {
if (length(iv) <= 1 & multivariate) {
stop("Cannot use multivariate = TRUE when only a single IV")
}
nIV <- length(iv)
if (length(covariates)) {
f.cov <- internalformulaIt(dv, "", covariates)
}
data <- data
if (multivariate) {
message("Multivariate uses complete cases for all IVs and covariates")
f.all <- internalformulaIt(dv, iv, covariates)
data <- na.omit(get_all_vars(as.formula(f.all), data = data))
m.all <- internalrunIt(f.all, type, data = data, ...)
if (length(covariates)) {
m.all.cov <- internalrunIt(f.cov, type, data = data, ...)
}
}
i <- NULL; rm(i) ## make Rcmd check happy
if (requireNamespace("foreach", quietly = TRUE)) {
`%dopar%` <- foreach::`%dopar%`
results <- foreach::foreach(i = 1:nIV, .combine = list) %dopar% {
if (multivariate) {
out <- list(
data = data,
Unadjusted = internalrunIt(internalformulaIt(dv, iv[i], ""),
type = type, data = data,
...),
Covariate = m.all.cov,
Adjusted = if (!length(covariates)) {
NA
} else {
internalrunIt(internalformulaIt(dv, iv[i], covariates),
type = type, data = data,
...)
},
Reduced = internalrunIt(internalformulaIt(dv, iv[-i], covariates),
type = type, data = data,
...),
Full = m.all)
out$Summary <- data.table(
Type = c("Unadjusted", "Adjusted", "MultiUnique"),
R2 = c(out$Unadjusted$r.sq,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$r.sq - out$Covariate$r.sq),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$r.sq - out$Reduced$r.sq)),
D = c(out$Unadjusted$dev.expl,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$dev.expl - out$Covariate$dev.expl),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$dev.expl - out$Reduced$dev.expl)))
out
} else {
tmpdata <- na.omit(get_all_vars(as.formula(
internalformulaIt(dv, iv[i], covariates)), data = data))
out <- list(
data = tmpdata,
Unadjusted = internalrunIt(internalformulaIt(dv, iv[i], ""),
type = type, data = tmpdata,
...),
Covariate = if (!length(covariates)) {
NA
} else {
internalrunIt(internalformulaIt(dv, "", covariates),
type = type, data = tmpdata,
...)
},
Adjusted = if (!length(covariates)) {
NA
} else {
internalrunIt(internalformulaIt(dv, iv[i], covariates),
type = type, data = tmpdata,
...)
},
Reduced = NA,
Full = NA)
out$Summary <- data.table(
Type = c("Unadjusted", "Adjusted", "MultiUnique"),
R2 = c(out$Unadjusted$r.sq,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$r.sq - out$Covariate$r.sq),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$r.sq - out$Reduced$r.sq)),
Deviance = c(out$Unadjusted$dev.expl,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$dev.expl - out$Covariate$dev.expl),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$dev.expl - out$Reduced$dev.expl)))
out
}
}
} else {
results <- list()
results <- for (i in 1:nIV) {
if (multivariate) {
out <- list(
data = data,
Unadjusted = internalrunIt(internalformulaIt(dv, iv[i], ""),
type = type, data = data,
...),
Covariate = m.all.cov,
Adjusted = if (!length(covariates)) {
NA
} else {
internalrunIt(internalformulaIt(dv, iv[i], covariates),
type = type, data = data,
...)
},
Reduced = internalrunIt(internalformulaIt(dv, iv[-i], covariates),
type = type, data = data,
...),
Full = m.all)
out$Summary <- data.table(
Type = c("Unadjusted", "Adjusted", "MultiUnique"),
R2 = c(out$Unadjusted$r.sq,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$r.sq - out$Covariate$r.sq),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$r.sq - out$Reduced$r.sq)),
D = c(out$Unadjusted$dev.expl,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$dev.expl - out$Covariate$dev.expl),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$dev.expl - out$Reduced$dev.expl)))
results[[i]] <- out
} else {
tmpdata <- na.omit(get_all_vars(as.formula(
internalformulaIt(dv, iv[i], covariates)), data = data))
out <- list(
data = tmpdata,
Unadjusted = internalrunIt(internalformulaIt(dv, iv[i], ""),
type = type, data = tmpdata,
...),
Covariate = if (!length(covariates)) {
NA
} else {
internalrunIt(internalformulaIt(dv, "", covariates),
type = type, data = tmpdata,
...)
},
Adjusted = if (!length(covariates)) {
NA
} else {
internalrunIt(internalformulaIt(dv, iv[i], covariates),
type = type, data = tmpdata,
...)
},
Reduced = NA,
Full = NA)
out$Summary <- data.table(
Type = c("Unadjusted", "Adjusted", "MultiUnique"),
R2 = c(out$Unadjusted$r.sq,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$r.sq - out$Covariate$r.sq),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$r.sq - out$Reduced$r.sq)),
Deviance = c(out$Unadjusted$dev.expl,
ifelse(is.na(out$Adjusted)[1], NA,
out$Adjusted$dev.expl - out$Covariate$dev.expl),
ifelse(is.na(out$Reduced)[1], NA,
out$Full$dev.expl - out$Reduced$dev.expl)))
results[[i]] <- out
}
}
}
if (length(results) != nIV & nIV == 1) {
results <- list(results)
}
return(results)
}
#' Compares the effects of various independent variables on dependent variables
#'
#' Utility to estimate the unadjusted, covariate adjusted, and
#' multivariate adjusted unique contributions of one or more IVs
#' on one or more DVs
#'
#' @param dv A character string or vector of the depentent variable(s)
#' @param type A character string or vector indicating the type of
#' dependent variable(s)
#' @param iv A character string or vector giving the IV(s)
#' @param covariates A character string or vector giving the covariate(s)
#' @param data The data to be used for analysis
#' @param multivariate A logical value whether to have models with all
#' IVs simultaneously.
#' @param \ldots Additional arguments passed on to the internal function,
#' \code{.runIt}.
#' @return A list with all the model results.
#' @export
#' @examples
#' test1 <- compareIVs(
#' dv = c("mpg", "disp"),
#' type = c("normal", "normal"),
#' iv = c("hp", "qsec"),
#' covariates = "am",
#' data = mtcars, multivariate = TRUE)
#' test1$OverallSummary
#' rm(test1)
compareIVs <- function(dv, type, iv, covariates = character(),
data, multivariate = FALSE, ...) {
stopifnot(identical(length(dv), length(type)))
res <- lapply(seq_along(dv), function(i) {
internalcompareIV(
dv = dv[i], type = type[i],
iv = iv,
covariates = covariates,
data = data,
multivariate = multivariate,
...)
})
res$OverallSummary <- do.call(rbind, lapply(seq_along(dv), function(x) {
do.call(rbind, lapply(seq_along(iv), function(y) {
cbind.data.frame(
dv = dv[x],
iv = iv[y],
res[[x]][[y]]$Summary,
stringsAsFactors = FALSE)
}))
}))
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.