# Author: Brenden Tervo-Clemmens
# Packager: Will Foran
#' gam_growthrate
#'
#' https://people.maths.bris.ac.uk/~sw15190/mgcv/tampere/mgcv-advanced.pdf
#' https://stats.stackexchange.com/questions/190348/can-i-use-bootstrapping-to-estimate-the-uncertainty-in-a-maximum-value-of-a-gam
#' @description posterior simulation for confidence intervals of local slope/growth rate (deriv) of mgcv gam modeled variable (age) - random effects: intercepts only (not predicted)
#' @param m mgcv gam model object (only)
#' @param agevar variable for growth rate
#' @param idvar random effects/subject id variable, set to NULL if none
#' @param nnumber of iterations to run (quick)
#' @param qntquantiles to use for confidence interval
#' @importFrom MASS mvrnorm
#' @export
#' @examples
#' d <- read.csv("/Volumes/Phillips/R03_BehavioralBTC/data/btc_R03scoredmeasures_20190313.csv") %>%
#' group_by(id) %>%
#' mutate(visit=rank(d8))
#' f <- f1score ~ s(Ageatvisit) + s(visit) + s(id, bs="re")
#' m <- mgcv::gam(f, data=d)
#' ci <- gam_growthrate(m, 'Ageatvisit', 'id')
gam_growthrate <- function(m, agevar, idvar=NULL, n.iterations=10000, qnt=c(.025, .975)) {
simdiff <- sim_diff1_from_gam(m, agevar, idvar, n.iterations=n.iterations)
ci <- ci_from_simdiff1(simdiff$pred, simdiff$ages, qnt=qnt)
ci$fit <- simdiff$fit
return(ci)
}
#find vars in a formula surround by s(...), ignore s(...,bs="re")
# remove any additionaly columns provided as 2nd or more arguments
# probably agevar
#' find_covars_gam
#'
#' @description pull out covariests from mgcv model formula
#' @param fml formula to get covariates from
#' @param ... covariates to ignore (like agevar)
#' @export
#' @examples
#' fml <- y ~ s(x) + x2 + s(x3, bs="re") + x4
#' covars <- find_covars_gam(fml, x4) # x, x2
find_covars_gam <- function(fml, ...) {
ind <- as.character(fml)[3]
# s(x1) + x2 +s(x3,"re") -> x1, x2
vars <- unlist(strsplit(ind, "\\+")) # formula split by +
vars <- gsub(" ", "", vars) # remove spaces
vars <- gsub("\\w+\\((.*?)\\)", "\\1", vars) # remove surrounding s()
# remove random effect
no_re <- grep("re[\"']", vars, value=T, invert=T)
no_re <- gsub(",.*", "", no_re) # nothing after comma
# remove anything else (likely passed in agevar)
if (length(list(...)) != 0L) {
no_re <- no_re[ ! no_re %in% c(...)]
}
return(no_re)
}
sim_diff1_from_gam <- function(m, agevar, idvar=NULL,
n.iterations=10000, interval_inc=.1) {
v <- m$model[, agevar]
cond_list <- list(seq(min(v), max(v), by=interval_inc))
pp <- data.frame(a=cond_list[[1]], b=Inf)
# names should match what went into the model
names(pp) <- c(agevar, idvar)
# what if idvar is factor (Inf wont work)
if (is.null(idvar)) {
# do nothing. no idvar
} else if (is.factor(m$model[, idvar])){
# select idvar with the middle most random effect
# random effects are coefficents like s(idvar).xxxxx
# where xxxx is the index of the specific idvar factor name
idvarpatt <- sprintf("s\\(%s\\)", idvar)
idvarpatt. <- sprintf("s\\(%s\\).", idvar)
randeff <- m$coefficients[ grep(idvarpatt, names(m$coefficients)) ]
medval <- sort(randeff)[floor(length(randeff)/2)]
med_re_name <- names(which(randeff == medval))
median_idx <- gsub(idvarpatt., "", med_re_name)
median_subj <- levels(m$model[, idvar])[as.numeric(median_idx)]
warning("gam w/factor idvar, ",
"setting the middle most random effect subject: ",
median_subj)
pp[, 2] <- median_subj
# alternatively, select the first
# pp[, 2] <- m$model[1, idvar]
} else {
warning("predition with continous (non-factor) idvar will give 'Inf' fit")
# maybe pick middle value instead?
# pp[, 2] <- mean(m$model[, idvar], na.rm=T)
}
# for all covars, pick out the mean
for (cv in find_covars_gam(m$formula, agevar)) {
x <- m$model[, cv]
if (is.character(x) || is.factor(x) ){
warning("gam w/factor covar, setting all sim to the first!")
y <- x[1]
# TODO: maybe pracma::Mode ?
} else {
y <- mean(x, na.rm=T)
}
pp[, cv] <- y
}
Xp <- predict(m, pp, type="lpmatrix")
mu_beta <- coef(m)
sigma_Vb <- vcov(m)
# variance-covariance matrix of the main parameters fitted model
# used as: a positive-definite symmetric matrix specifying
# the covariance matrix of the variables.
# set.seed(10)
mrand <- MASS::mvrnorm(n.iterations, mu_beta, sigma_Vb)
# gamm$gam doesn't support 'family'
# class( mgcv::gamm(data=mtcars, cyl ~ mpg + s(wt) )$gam)
# [1] "gam"
# class( mgcv::gam(data=mtcars, cyl ~ mpg + s(wt) ))
# [1] "gam" "glm" "lm"
if("glm" %in% class(m)){
ilink <- family(m)$linkinv
} else {
ilink <- m$family$linkinv
}
# only want inetercept and agevar
keep_cols <- grep(paste0("Intercept|", agevar), dimnames(Xp)[[2]], value=T)
Xp_agevar <- Xp[, keep_cols]
mrand_agevar <- mrand[, keep_cols]
# generate a whole bunch of plausable values, get the diff
diffs <- lapply(seq_len(n.iterations), function(i) {
fit <- ilink(Xp_agevar %*% mrand_agevar[i, ])
dff <- c(NA, diff(fit))
return(dff)
})
return(list(pred=diffs, ages=pp[, 1], fit=predict(m, pp)))
}
ci_from_simdiff1 <- function(pred, ages, qnt=c(.025, .975)) {
names(pred) <- 1:length(pred)
mm <- t(dplyr::bind_rows(pred))
# this is the ouptut !
mean_dff <- apply(mm, 2, mean)
ci <- apply(mm, 2, quantile, qnt, na.rm=T)
colnames(ci) <- ages
out <- data.frame(mean_dff=mean_dff, ages=ages)
ci_out <- t(ci)
dimnames(ci_out)[[2]] <- c("ci_low", "ci_high")
return(cbind(out, ci_out))
# NEVER REACHED -- left as bad documentation
# old: return just ci and mean_dff
return(list(ci=ci, mean_dff=mean_dff))
# this is for fun
ages[which.min(ci[1, ])]
ages[which.min(ci[2, ])]
plot(ages, mean_dff)
for (i in 1:10) lines(ages, pred[[i]])
}
too_small <- function(x) abs(x) < 10^-15
clip_on_sig <- function(ci){
# if confidence interval includes zero
# signs of x and y will be different, -x * +y < 0
# or if both high and low are extremly close to zero
not_sig <- ci$ci_low * ci$ci_high < 0 |
(too_small(ci$ci_low) & too_small(ci$ci_high))
ci$mean_dff_clip <- ci$mean_dff
ci$mean_dff_clip[not_sig] <- 0
return(ci)
}
#' gam_maturation_point
#'
#' @description get maturation point from confidence interval dataframe
#' @param ci growthrate_gam output (confidence interval and derivitive)
#' @export
gam_maturation_point <- function(ci) {
# when ci bounds include 0 (different sign), no longer signficant
# clip out insignificant derivitive
if (is.na(ci$ci_low[1])) ci <- ci[-1, ]
# get mean_df_clip column
if (! "mean_dff_clip" %in% names(ci)) ci <- clip_on_sig(ci)
# find maturation point after the first signficant age
onset_sig <- ci$ages[ci$mean_dff_clip != 0]
maturation_pnt <- NA
if (length(onset_sig)>0L && !all(is.na(onset_sig))) {
mat_points_idx <- ci$mean_dff_clip==0 & ci$ages > onset_sig[1]
if (length(mat_points_idx) > 0L && any(mat_points_idx))
maturation_pnt <- min(ci$ages[mat_points_idx], na.rm=T)
}
return(maturation_pnt)
}
#' plot gam factor with deriv
#'
#' @description plot output of growthrate_gam and list for gam_growthrate_plot_combine
#' @param d dataframe model was built on (for actual points)
#' @param model gam model (for predicted line)
#' @param ci growthrate_gam output (confidence interval and derivitive)
#' @param agevar column name of age var e.g. 'Ageatvisit'
#' @param idvar line grouping var e.g., 'lunaid', set to NULL if no random effect in model
#' @param yvar model yvar e.g. 'f1score', default pulled from model formula
#' @param plotsavename PDF output name e.g. 'growth.pdf', not saved when NA, not ploted when NULL
#' @param xplotname 'Age'
#' @param yplotname 'f1score', default is yvar (model yvar)
#' @param draw_maturation T|F, show dotted line on first maturation point
#' @param draw_points T|F, show individual points as scatter plot over gam fit line
#' @param show_all_fill T|F, should we clip the raster fill to only significant ages?
#' @param ci_plot T|F, plot 95 percent confidence interval with geom_ribbon?
#' @export
#' @importFrom itsadug get_predictions
#' @examples
#' # no random effects
#' m <- mgcv::gam(f1score ~ s(Ageatvisit), data=d)
#' ci <- gam_growthrate(m, 'Ageatvisit')
#' gam_growthrate_plot(d, m, ci, 'Ageatvisit')
#'
#' # w/random effects 'id'
#' m <- mgcv::gam(f1score ~ s(Ageatvisit) + s(visit) + s(id, bs="re"), data=d)
#' ci <- gam_growthrate(m, 'Ageatvisit')
#' gam_growthrate_plot(d, m, ci, 'Ageatvisit', 'id')
#'
#' # replot example, see gam_growthrate_plot_combine
#' mod <- mgcv::gam(conc~s(uptake), data=CO2)
#' ci <- LNCDR::gam_growthrate(mod, 'uptake', n = 10000, qnt = c(0.025, 0.975))
#' plist <- gam_growthrate_plot(cars, mod, ci, 'uptake', xplotname='uptake')
#' plist$ageplot <- plist$ageplot + xlab('foobar')
#' gam_growthrate_plot_combine(plist$ageplot, plist$tile)
gam_growthrate_plot <-
function(d, model, ci, agevar, idvar=NULL,
yvar=as.character(model$formula[2]),
plotsavename=NA, xplotname="Age", yplotname=yvar,
draw_maturation=T, draw_points=T, show_all_fill=F,
ci_plot=T){
require(ggplot2)
require(itsadug)
# TODO:
# remove or replace first row mean_dff
# NA draws weird first color on spectrum
# make sure we have what we say we want
if (! "gam" %in% class(model) ) stop("model given must be a gam model!")
if (! "data.frame" %in% class(d) ) stop("d given must be a data.frame!")
if (! "data.frame" %in% class(ci) ) stop("ci is not growthrate_gam() output")
if (! yvar %in% names(model$model) ) stop(yvar, "not in model dataframe!")
ci$mean_dff_clip <- ci$mean_dff
# when ci bounds include 0 (different sign), no longer signficant
ci <- clip_on_sig(ci)
maturation_pnt <- gam_maturation_point(ci)
# warn about no matruation point
if (is.na(maturation_pnt) && draw_maturation) {
warning("No maturation point!")
draw_maturation <- F
}
# show even unsignficant change in raster if show_all_fill
fill_column <- ifelse(show_all_fill, "mean_dff", "mean_dff_clip")
## setup derivitive raster plot
deriv_range <- range(ci$mean_dff, na.rm=T)
tile <-
ggplot(ci[-1, ]) + # don't plot first row (is NA)
aes(x=ages, y=1, fill=!!sym(fill_column)) +
geom_raster(interpolate=TRUE) +
scale_fill_gradient2(
low = "blue", mid = "white", high = "red",
midpoint = 0,
space = "Lab",
breaks=sort(c(0, deriv_range)), # assumes range covers 0
limits=deriv_range
) +
xlab(sprintf("\n%s", xplotname))
# draw dotted line where maturation point is
if (draw_maturation)
tile <- tile +
geom_segment(
linetype=2, colour="black",
aes(x=maturation_pnt, xend=maturation_pnt, y=.5, yend=1.5))
# lunaize the figure
tile_luna <- lunaize_geomraster(tile) +
theme(text = element_text(size=36))
# predictions
modeldata<-data.frame(ydata=model$y, agevar=model$model[, agevar])
condlist <- list(a=ci$ages)
names(condlist) <- agevar
# 20190826 BTC - remove random effects (bug fix)
agepred <- itsadug::get_predictions(model, cond = condlist, rm.ranef=TRUE)
ageplot<-
ggplot(agepred) +
aes(x=!!sym(agevar), y=fit) +
# solid bold line for fitted model
geom_line(colour="black", linewidth=2) +
# label plot
ylab(yplotname) +
xlab(xplotname)
if (ci_plot) {
ageplot <- ageplot +
geom_ribbon(aes(ymin=fit - CI, ymax=fit + CI), alpha=.3)
}
# individual points for actual data
if (draw_points) ageplot <- ageplot +
geom_point(data=modeldata, aes(y=ydata, x=agevar), alpha=.2)
# add connecting lines if we have an idvar
if (!is.null(idvar) && draw_points)
ageplot <- ageplot +
geom_line(data=d, aes(y=!!sym(yvar), group=!!sym(idvar)), alpha=.2)
# lunaize main plot
ageplot_luna<-LNCDR::lunaize(ageplot)+
theme(text = element_text(size=36),
axis.title.x=element_blank(),
axis.text.x=element_blank())
# save to file if we have plotsavename
g <- gam_growthrate_plot_combine(ageplot_luna, tile_luna, plotsavename)
list_of_plots <- list(tile=tile_luna, ageplot=ageplot_luna, both=g)
# give back everything we created
return(list_of_plots)
}
#' combine age plot and tile slop heatmap into one figure (w/grob and grid)
#'
#' @description save two figures (only use if you need to mess with titles)
#' @export
#' @param ageplot_luna ggplot plot of subject coef by age (top part of figure)
#' @param tile_luna tile heatmap of slope (bottom part of figure)
#' @param PDFout PDF name to save output into, NA no saved, NULL not plotted
#' @examples
#' data <- data.frame(age=1:100,fd_mean=1:100,subj=as.factor(letters[1:25]), conn_ahpc_vmpfc=randu[1:100,1])
#' mod<-mgcv::gam(conn_ahpc_vmpfc~s(age)+s(fd_mean)+s(subj, bs="re"), data=data)
#' ci<-LNCDR::gam_growthrate(mod, 'age', n = 10000, qnt = c(0.025, 0.975), idvar='subj')
#' plist <- gam_growthrate_plot(data, mod, ci, 'age', idvar='subj')
#' plist$tile <- plist$tile + xlab('AGE')
#' g <- gam_growthrate_plot_combine(plist$ageplot, plist$tile, 'gammod.pdf')
gam_growthrate_plot_combine <- function(ageplot_luna, tile_luna, PDFout=NA) {
require(grid)
require(gridExtra)
tilegrob<- ggplotGrob(tile_luna)
agegrob <- ggplotGrob(ageplot_luna)
g<-rbind(agegrob, tilegrob, size="first")
panels <- g$layout$t[grep("panel", g$layout$name)]
g$heights[panels] <- unit(c(1, .1), "null")
# NULL is no draw
# NA is draw to screen
# filename is save to pdf
if (is.null(PDFout)){
return(g)
} else if (is.na(PDFout)) {
grid.draw(g)
} else {
# check we are saving pdf
ext <- rev(strsplit(PDFout, "\\.")[[1]])[1]
if (ext != "pdf") stop(PDFout, " must end in .pdf!")
# draw into pdf
pdf(PDFout, height = 9, width = 12)
grid.draw(g)
dev.off()
}
return(g)
}
lunaize_geomraster<-function(x){
x+
theme_bw()+
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
legend.position = "none")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.