#' Plots spline with confidence intervals
#'
#' @param outcome Outcome variable shown in the y-axis. This should be convertible using \code{AsNumeric}.
#' @param predictor Predictor variable shown in the x-axis. This should be a numeric or date variable.
#' @param type Regression link function used. Currently can be "Binary Logit" or "Linear".
#' @param subset Optional logical vector of the same length as \code{outcome}
#' and \code{predictor} specifying whether the observation should be included
#' in the analysis.
#' @param weights Optional numeric vector used as sampling weights.
#' @param seed Random seed for reproducibility
#' @param confidence Width of confidence interval shown around the spline.
#' @param number.draws Number of possible trend lines to super-impose.
#' @param trim.padding Logical; whether to remove padding around plotly chart.
#' Default is set to false so that output is the same as old charts.
#' @importFrom flipTransformations AsNumeric AdjustDataToReflectWeights DichotomizeFactor
#' @importFrom mgcv gam mroot
#' @importFrom ggplot2 ggplot geom_ribbon geom_path aes theme_set theme_bw labs scale_y_continuous
#' @importFrom plotly ggplotly config
#' @importFrom utils stack
#' @importFrom scales percent
#' @importFrom rlang .data
#' @importFrom stats binomial coef complete.cases family predict quantile rnorm vcov gaussian
#' @importFrom verbs Sum
#' @export
SplineWithSimultaneousConfIntervals <- function(outcome,
predictor,
type = "Binary Logit",
subset = NULL,
weights = NULL,
seed = 42,
number.draws = 30,
confidence = 0.95,
trim.padding = FALSE)
{
if (length(unique(outcome)) == 1)
stop("Could not construct model as outcome variable contains only one value.")
if (type == "Binary Logit")
{
# If outcome variable is too unbalanced then binary logit model
# cannot be fitted anyway so no need to continue
twolevel <- try(DichotomizeFactor(as.factor(outcome)), silent = TRUE)
if (inherits(twolevel, "try-error"))
stop("Outcome variable cannot be dichotimized (e.g. perhaps only has 1 value).")
tidy.outcome <- twolevel == levels(twolevel)[2]
attr(outcome, "label") <- paste(attr(outcome, "label"), levels(twolevel)[2])
} else
tidy.outcome <- AsNumeric(outcome, binary = FALSE)
data <- data.frame(outcome = tidy.outcome,
predictor = predictor,
predictor.numeric = scale(as.numeric(predictor)))
logit <- type == "Binary Logit"
# Filters and weights
if (length(subset) > 1)
{
if (length(subset) != nrow(data))
stop("Filter does not have the same length as the input data. Select a variable from the same data set.")
data <- subset(data, subset)
weights <- subset(weights, subset)
}
if (!is.null(weights))
{
data <- try(AdjustDataToReflectWeights(data, weights))
if (inherits(data, "try-error"))
stop("Could not create dataset of ", ceiling(Sum(weights, remove.missing = FALSE)),
" observations as specified by the weights")
}
data <- data[complete.cases(data), ]
ylab <- attr(outcome, "label")
xlab <- attr(predictor, "label")
# Fitting the model
m <- gam(outcome ~ s(predictor.numeric), data = data,
family = if (logit) binomial(link="logit") else gaussian(link="identity"),
method = "REML")
newd <- with(data,
data.frame(predictor.numeric = seq(min(predictor.numeric, na.rm = TRUE),
max(predictor.numeric, na.rm = TRUE), length = 200),
predictor = seq(min(predictor, na.rm = TRUE),
max(predictor, na.rm = TRUE), length = 200)))
if (inherits(newd$predictor, "POSIXct"))
newd$predictor <- as.POSIXct(newd$predictor, origin = "1970-01-01")
pred <- predict(m, newd, type = "link", se.fit = TRUE)
pred$predictor <- newd$predictor
# Sample from MVN random deviates
rmvn <- function(n, mu, sig)
{
L <- mroot(sig)
m <- ncol(L)
t(mu + L %*% matrix(rnorm(m*n), m, n))
}
set.seed(seed)
N <- 10000
Vb <- vcov(m)
BUdiff <- rmvn(N, mu = rep(0, nrow(Vb)), sig = Vb)
Cg <- predict(m, newd, type = "lpmatrix")
simDev <- Cg %*% t(BUdiff)
absDev <- abs(sweep(simDev, 1, pred$se.fit, FUN = "/"))
masd <- apply(absDev, 2L, max)
crit <- quantile(masd, prob = confidence, type = 8)
pred <- data.frame(data.frame(pred), newd,
uprS = pred$fit + (crit * pred$se.fit),
lwrS = pred$fit - (crit * pred$se.fit))
if (logit)
{
invlink <- family(m)$linkinv
pred$fit <- invlink(pred$fit)
pred$uprS <- invlink(pred$uprS)
pred$lwrS <- invlink(pred$lwrS)
}
sims <- rmvn(N, mu = coef(m), sig = Vb)
fits <- Cg %*% t(sims)
# Create plot
theme_set(theme_bw())
p <- ggplot(pred, aes(x = .data$predictor, y = .data$fit)) +
geom_ribbon(aes(ymin = .data$lwrS, ymax = .data$uprS), alpha = 0.2, fill = "red") +
geom_path(lwd = 2) +
labs(y = ylab, x = xlab,
title = paste0("Simultaneous ", confidence * 100, "% confidence intervals for fitted GAM"),
subtitle = sprintf("Each line is one of %i draws from the Bayesian posterior distribution of the model", number.draws))
if (number.draws > 0)
{
rnd <- sample(N, number.draws)
stackFits <- if (logit) stack(as.data.frame(invlink(fits[, rnd])))
else stack(as.data.frame(fits[, rnd]))
stackFits <- transform(stackFits, predictor.numeric = rep(newd$predictor.numeric, length(rnd)))
stackFits$predictor = newd$predictor
p = p + geom_path(data = stackFits,
mapping = aes(y = .data$values, x = .data$predictor, group = .data$ind),
alpha = 0.4, colour = "grey20")
}
if (logit)
p <- p + scale_y_continuous(labels = percent)
p <- ggplotly(p)
p <- config(p, displayModeBar = FALSE)
p$sizingPolicy$browser$padding <- if (trim.padding) 0 else 40
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.