library(tidyverse)
#' Predicted probability from svyglm model and new data, along with 95\% CI.
#'
#' @param mod A svyglm model object.
#' @param df A data frame of new data created with, e.g., expand.grid.
#' @return df with three additional columns:
#' \describe{
#' \item{Probability}{The predicted probability at each value in df}
#' \item{LL}{The lower 95\% confidence limit}
#' \item{UL}{The upper 95\% confidence limit}
#' }
#' @examples
#' m = glm(am ~ mpg + hp + cyl, family=binomial, data=mtcars)
#' newdata = expand.grid(mpg=10:35, hp=50:335, cyl=c(4,6,8))
#' df = predicted_prob(m, newdata)
#' @export
predicted_prob = function(mod, df){
df2 = cbind(df, predict(mod, newdata = df, type = "link", se=T))
return(
within(df2, {
Probability = plogis(link)
LL = plogis(link - (1.96 * SE))
UL <- plogis(link + (1.96 * SE))
})
)
}
cohen_d <- function(x, ...) UseMethod("cohen_d")
cohen_d.default <- function(x, y, test=F, sig = 3, ...){
if (!mode(x) %in% c('integer', 'numeric')) stop('x must be a numeric variable')
if (!mode(y) %in% c('integer', 'numeric')) stop('y must be a numeric variable')
x <- na.omit(x)
nx <- length(x)
if (nx < 2) stop("x must have 2 or more non-missing values")
y <- na.omit(y)
ny <- length(y)
if (ny < 2) stop("y must have 2 or more non-missing values")
d <- signif((mean(x) - mean(y))/sqrt(((nx-1)*sd(x)^2 + (ny-1)*sd(y)^2)/(sum(nx, ny)-2)), sig)
if (test){
p.value <- suppressWarnings(wilcox.test(x, y)$p.value)
p.value <- signif(p.value, sig)
return(list(d=d, 'p-value'=p.value))
}
return(d)
}
#' Cohen's d
#'
#' @param f A two-sided formula: y ~ x.
#' @param d A data frame.
#' @param test Whether to include a Wilcoxon (Mann-Whitney) test of location difference (default: F)
#' @param sig The number of significant digits to report (default: 3)
#' @return Cohen's d
#' @examples
#' cohen_d(mpg ~ am, mtcars)
#' @export
cohen_d.formula = function(f, data, ...){
mf <- model.frame(f, data = data)
if (ncol(mf) != 2) stop('Formula must contain only 2 variables: y ~ x')
facs <- unique(mf[[2]])
if (length(facs) != 2) stop('factor must have exactly two values')
if (!mode(mf[[1]]) %in% c('integer', 'numeric')) stop('Left hand side must be a numeric variable')
v1 <- mf[[1]][mf[[2]]==facs[1]]
v2 <- mf[[1]][mf[[2]]==facs[2]]
do.call("cohen_d", list(v1, v2, ...))
}
#' pca_loadings_plot
#'
#' A lollipop plot of PCA variable loadings,
#' as provided by prcomp, e.g., PC1, PC2, ...
#'
#' @param obj Output of prcomp
#' @param components A numeric vector of components to plot
#' @param sortby Sort variables by loadings on this component (index to component vector)
#' @param threshold Remove variables with sum(abs(loadings)) below threshold (default=0)
#' @param reverse Vector of components that will be multiplied by -1
#' @return ggplot object
#' @examples
#' m <- prcomp(mtcars, scale = T)
#' pca_loadings_plot(m, components = c(1,2))
#' @export
pca_loadings_plot <- function(obj, components = 1:3, sortby = 1, threshold = 0, reverse=NULL){
require(dplyr)
if (!all(reverse %in% components)) stop("reverse vector must be a subset of components vector")
pca_summ <- summary(obj)
comp_nms <- colnames(pca_summ$importance)
varprop <- pca_summ$importance[2,components] # Get % variance
nms <- comp_nms[components]
varprop <- paste0(nms, ' (', round(varprop*100, 1), '%)')
names(varprop) <- nms
pcs <- colnames(obj$rotation)[components]
loadings <-
obj$rotation[,components, drop = F] %*% diag(obj$sdev[components]) %>%
data.frame %>%
setNames(pcs)
if(!is.null(reverse)) loadings[reverse] <- -loadings[reverse]
loadings <- loadings[rowSums(abs(loadings)>threshold)>0,]
loadings %>%
dplyr::mutate(Variable = forcats::fct_reorder(rownames(.), loadings[, sortby])) %>%
tidyr::gather(key = PC, value = Loading, -Variable) %>%
dplyr::mutate(PC = varprop[PC]) %>%
ggplot2::ggplot() +
ggplot2::geom_segment(ggplot2::aes(x=0, xend=Loading, y=Variable, yend=Variable, colour = Loading), size=2) +
ggplot2::geom_point(ggplot2::aes(x=Loading, y=Variable, colour = Loading), size=3) +
ggplot2::scale_color_gradient2(low = viridisLite::magma(11)[8], mid = 'white', 'high' = viridisLite::magma(11)[4]) +
ggplot2::geom_vline(xintercept = 0) +
ggplot2::facet_wrap(~PC) +
ggplot2::labs(x = '\nStandardized loadings', y = '') +
ggplot2::theme_bw(15)
}
#' @title pca_biplot
#' @description Create a ggplot biplot of a prcomp obj
#' @param obj prcomp obj
#' @param components The PCs to plot, specified as a numeric vector of length 2, Default: c(1, 2)
#' @param data Optional data frame with the same number of rows as that used to compute pca, Default: NULL
#' @param threshold Only display variables with loadings exceeding this value, Default: 0
#' @param reverse Vector of components that will be multiplied by -1
#' @param label_size text size of the eigenvector labels, Default: 5
#' @param geom_point whether to add geom_point to plot, Default: T
#' @return ggplot obj
#' @details Creates a ggplot obj from a prcomp obj. Use threshold to omit variables with small loadings. Add a data frame to create a more complex ggplot. Possibly set geom_point=F to add a geom_point with custom aesthetics.
#' @examples
#' \dontrun{
#' if(interactive()){
#' m <- prcomp(mtcars, scale. = T)
#' pca_biplot(m)
#' pca_biplot(m, data = mtcars, geom_point=F) + geom_point(aes(size = mpg))
#' }
#' }
#' @rdname pca_biplot
#' @export
pca_biplot <- function(obj, components = c(1,2), data = NULL, threshold = 0, reverse=NULL, label_size = 5, geom_point=T) {
if (class(obj) != 'prcomp') stop('obj class must be prcomp')
if (length(components) != 2 | mode(components) != 'numeric') stop('components is not a numeric vector of length 2')
if (max(components > ncol(obj$x)) | min(components) < 1) stop(paste('components must be between 1 and ', num_pc))
if (!all(reverse %in% components)) stop("reverse vector must be a subset of components vector")
pcs <- colnames(obj$x)
if (!is.null(reverse)){
obj$x[,reverse] <- -obj$x[,reverse]
obj$rotation[,reverse] <- -obj$rotation[,reverse]
}
pcX <- pcs[components[1]]
pcY <- pcs[components[2]]
if (!is.null(data)){
if (!'data.frame' %in% class(data)) stop('data must be a data frame')
if (nrow(data) != nrow(obj$x)) stop('the number of rows of data, ', nrow(data), ', does not equal the number of rows of x, ', nrow(obj$x))
if (pcX %in% names(data) | pcY %in% names(data)) stop(paste('data cannot contain variables', pcX, 'or', pcY))
data[pcX] <- obj$x[,pcX]
data[pcY] <- obj$x[,pcY]
} else {
data = data.frame(obj$x[,components])
}
pct_var <- paste0('(', round(100 * summary(obj)$importance[2,components], 1), '%)')
pct_var <- paste(colnames(obj$x)[components], pct_var)
plot <-
ggplot(data, aes_string(x=pcX, y=pcY)) +
geom_hline(aes(yintercept = 0), size=.2) +
geom_vline(aes(xintercept = 0), size=.2) +
labs(x = pct_var[1], y = pct_var[2])
if (geom_point){
plot <- plot + geom_point()
}
datapc <- data.frame(varnames=rownames(obj$rotation), obj$rotation[,components])
datapc <- datapc[rowSums(abs(datapc[-1])>threshold)>0,]
mult <- min(
(max(data[,pcY]) - min(data[,pcY])/(max(datapc[,pcY])-min(datapc[,pcY]))),
(max(data[,pcX]) - min(data[,pcX])/(max(datapc[,pcX])-min(datapc[,pcX])))
)
datapc <- transform(datapc,
v1 = .7 * mult * (get(pcX)),
v2 = .7 * mult * (get(pcY))
)
plot +
coord_equal() +
ggrepel::geom_text_repel(data=datapc, aes(x=v1, y=v2, label=varnames), size = label_size, color="red", max.overlaps = Inf) +
geom_segment(data=datapc, aes(x=0, y=0, xend=v1, yend=v2), arrow=arrow(length=unit(0.2,"cm")), alpha=0.75, color="red") +
theme_minimal()
}
#' @title tjurD
#' @description Computes Tjur's D
#' @param m A glm object with family = binomial
#' @return A list with two elements, rsq = Tjur's D, and plot = ggplot object.
#' @details Computes Tjur's D and produces a probability plot
#' @examples
#' \dontrun{
#' if(interactive()){
#' m <- glm(am ~ mpg, family = binomial, mtcars)
#' out <- tjurD(m)
#' out$rsq
#' out$plot
#' }
#' }
#' @rdname tjurD
#' @export
#' @importFrom ggplot2 ggplot aes geom_histogram facet_wrap
tjurD <- function(m){
y <- m$model[[1]] # Outcome var
v <- sort(unique(y)) # binary levels of y
probs <- m$fitted.values
rsq <- mean(probs[y == v[2]]) - mean(probs[y == v[1]])
df <- data.frame(
Probability = probs,
y = y
)
list(
rsq = rsq,
plot = ggplot2::ggplot(df, ggplot2::aes(Probability)) +
ggplot2::geom_histogram() +
ggplot2::facet_wrap(~y, ncol = 1)
)
}
#' svysmooth2df
#'
#' Extracts the x and y values from one or more svysmooth objects,
#' from the survey package, and returns them in a data frame.
#'
#' @param ... One or more svysmooth objects with names.
#'
#' @return A data frame (tibble) with 3 columns: x, y, and a
#' character vector indicating which values belong to which smooth
#' @details If multiple smooths are supplied, the x and y variables
#' must be the same, but computed over different design objects (e.g.,
#' different subsets of a survey).
#' @export
#' @examples
#' library(survey)
#' data(api)
#' dclus1<-svydesign(id=~dnum, weights=~pw, data=apiclus1, fpc=~fpc)
#' dclus2<-subset(dclus1, sch.wide=="Yes" & comp.imp=="Yes")
#' m1 <- svysmooth(api00~ell,dclus1)
#' m2 <- svysmooth(api00~ell,dclus2)
#' df <- svysmooth2df(full = m1, subset = m2)
svysmooth2df <- function(...){
smooths <- list(...)
if(is.null(names(smooths))){
names(smooths) <- paste('Smooth', 1:length(smooths))
}
x <- purrr::map(smooths, list(1, 'x'))
y <- purrr::map(smooths, list(1, 'y'))
Smooth <- rep(names(smooths), times = purrr::map_int(x, length))
df <-
tibble::tibble(
purrr::flatten_dbl(x),
purrr::flatten_dbl(y),
Smooth
)
names(df)[1] <- names(smooths[[1]])
names(df)[2] <- attr(smooths[[1]], 'ylab')
return(df)
}
#' @title ggdotchart
#' @description ggplot version of base::dotchart
#' @param v A named vector, e.g., a table
#' @param threshold A numeric value. Values in v that are greater than this will have different colors and shapes
#' @return a ggplot object
#' @details Uses theme_minimal, and no axis labels.
#' @examples
#' \dontrun{
#' if(interactive()){
#' ggdotchart(table(mtcars$cyl))
#' }
#' }
#' @rdname ggdotchart
#' @export
ggdotchart <- function(v, threshold = NULL){
d <- tibble::tibble(
x = c(v), # remove table class
y = forcats::fct_reorder(names(v), v)
)
if (!is.null(threshold) & is.numeric(threshold)){
d$threshold <- d$x > threshold
p <- ggplot2::ggplot(d, ggplot2::aes(x, y, colour = threshold, shape = threshold))
} else {
p <- ggplot2::ggplot(d, ggplot2::aes(x, y))
}
if (min(v) >= 0) p <- p + ggplot2::xlim(0, NA)
p +
ggplot2::geom_point(size = 3) +
ggplot2::labs(x = "", y = "") +
ggplot2::theme_minimal(15)
}
#' @title scale_colour_binary
#' @description Provides exactly two colors from the viridis magma palette (red and purple)
#' @param direction Order of colors, 1: purple-red. -1: red-purple, Default: 1
#' @param ... Additional arguments to be passed to ggplot2::discrete_scale
#' @return a colour scale to be used with ggplot2
#' @details The default top level from the discrete viridis scale is bright yellow, which is too bright. This scale uses red for top level.
#' @examples
#' \dontrun{
#' if(interactive()){
#' ggplot(mtcars, aes(hp, mpg, colour=factor(am))) + geom_point() + scale_colour_binary()
#' }
#' }
#' @seealso
#' \code{\link[ggplot2]{discrete_scale}}
#' \code{\link[viridisLite]{magma}}
#' @rdname scale_colour_binary
#' @export
#' @importFrom ggplot2 discrete_scale
#' @importFrom viridisLite magma
scale_colour_binary<- function(direction=1, ...){
ggplot2::discrete_scale(
"colour", "binary",
function(n){
if (n > 2) stop('custom binary palette provides only 2 colors')
cols <- viridisLite::magma(11)[c(4,8)]
if (direction >= 0) cols else rev(cols)
},
...
)
}
#' @rdname scale_colour_binary
#' @export
scale_color_binary <- scale_colour_binary
#' @title scale_fill_binary
#' @description Provides exactly two colors from the viridis magma palette (red and purple)
#' @param direction Order of colors, 1: purple-red. -1: red-purple, Default: 1
#' @param ... Additional arguments to be passed to ggplot2::discrete_scale
#' @return a fill scale to be used with ggplot2
#' @details The default top level from the discrete viridis scale is bright yellow, which is too bright. This scale uses red for top level.
#' @examples
#' \dontrun{
#' if(interactive()){
#' ggplot(mtcars, aes(mpg, fill=factor(am))) + geom_histogram() + scale_fill_binary()
#' }
#' }
#' @seealso
#' \code{\link[ggplot2]{discrete_scale}}
#' \code{\link[viridisLite]{magma}}
#' @rdname scale_fill_binary
#' @export
#' @importFrom ggplot2 discrete_scale
#' @importFrom viridisLite magma
scale_fill_binary<- function(direction=1, ...){
ggplot2::discrete_scale(
"fill", "binary",
function(n){
if (n > 2) stop('custom binary palette provides only 2 colors')
cols <- viridisLite::magma(11)[c(4,8)]
if (direction >= 0) cols else rev(cols)
},
...
)
}
#' @title ggemmeans
#' @description ggplot of estimated marginal means object, with categorical spec variable sorted by estimate
#' @param em An emmeans object
#' @param by String indicating "by" term (conditioning variable)
#' @param reorder If TRUE (default), reorder levels of a categorical predictor by values of the estimate
#' @return a ggplot object
#' @details This function is similar to the emmeans plot function, except that it sorts the levels of a categorical variable by the estimated mean
#' @examples
#' \dontrun{
#' if(interactive()){
#' data(starwars, package='dplyr')
#' m <- lm(height ~ eye_color, starwars)
#' em <- emmeans(m, specs = 'eye_color')
#' ggemmeans(em)
#' }
#' }
#' @rdname ggemmeans
#' @export
ggemmeans <- function(em, by = NULL, reorder = T){
emsum <- summary(em)
estName <- attr(emsum, 'estName')
clNames <- attr(emsum, 'clNames')
var <- attr(emsum, 'pri.vars')[1]
if (reorder & is.factor(emsum[[var]])){
emsum[var] <- forcats::fct_reorder(emsum[[var]], emsum[[estName]])
}
if (is.null(by)){
p <- ggplot(emsum, aes_string(estName, var, xmin = clNames[1], xmax = clNames[2]))
} else {
p <- ggplot(emsum, aes_string(estName, var, xmin = clNames[1], xmax = clNames[2], colour = by))
}
p +
geom_pointrange(lwd = 2.5, alpha = 0.7, position = position_dodge(width = 0.25), fatten = 1) +
# geom_point(position = position_dodge(width = 0.25)) +
guides(colour = guide_legend(override.aes = list(size = 1))) +
theme_minimal(15)
}
#' @title hagenheat
#' @description Basic heatmap using ggplot, dist, and seriation. No dendrograms are plotted.
#' @param d A data frame, matrix, or dist object. Column 1 of a data frame can be row labels. Remaining columns must be numeric.
#' @param method "seriate" or "hclust". Default: "seriate"
#' @param seriation_method If not specified, 'Spectral' will be used for dist objects, and 'PCA' for matrices and data frames.
#' @param independent Whether to seriate a matrix using matrix methods (F) or separate distance matrices for rows and columns (T). Default: F
#' @param hc_method Agglomeration method from hclust, Default: 'ward.D'
#' @param dist Distance method from dist, Default: 'euclidean'
#' @param scale. Whether to scale rows ('row'), columns ('column'), or neither ('none'), Default: 'none'
#' @param viridis_option One of the viridis color options, 'A', 'B', 'C', 'D', 'E', Default: 'D'
#' @param wrap_labels Logical. Wrap the x-axis labels. Default: T
#' @param rotate_labels Logical. Rotate the x-axis labels by 90 degrees. Default: F
#' @param ann_col A data frame with two variables. The first column must be the names of the columns of 'd'. The second column contains the annotation values.
#' @param rev_col Reverse column order. Default: F
#' @param rev_row Reverse row order. Default: F
#' @return A ggplot object
#' @details Produces a very simple ggplot heatmap using viridis colors.
#' Rows and columns are ordered using the \code{seriation} package.
#' For data frames, first column must be row labels. Remaining columns must be numeric.
#' Scaling, if performed, occurs after seriation, and is therefore only for display purposes.
#' Seriation methods for objects when \code{independent=T}: \code{list_seriation_methods('dist')}.
#' Seriation methods for matrices and data frames when \code{independent=F}: \code{list_seriation_methods('matrix')}.
#' @examples
#' \dontrun{
#' if(interactive()){
#' hagenheat(mtcars, scale. = 'column')
#' }
#' }
#' @seealso
#' \code{\link[seriation]{seriate}}
#' \code{\link[seriation]{seriation_methods}}
#' \code{\link[ggnewscale]{new_scale_fill}}
#' @rdname hagenheat
#' @export
#' @importFrom seriation seriate get_order
#' @importFrom dplyr bind_cols
#' @importFrom tidyr gather
#' @importFrom scales label_wrap
#' @importFrom ggnewscale new_scale_fill
hagenheat <- function(
d,
method='seriate',
seriation_method=NULL,
independent = F,
hc_method = 'ward.D',
dist_method = 'euclidean',
scale. = "none",
viridis_option = 'D',
wrap_labels = T,
rotate_labels = F,
ann_col = NULL,
rev_col = F,
rev_row = F
){
if (inherits(d, 'dist')){
rwnms <- labels(d)
if (is.null(seriation_method)) seriation_method <- 'Spectral'
o <- seriation::seriate(d, method = seriation_method)
row_order <- seriation::get_order(o)
col_order <- row_order
d <- as.matrix(d)
if (is.null(rwnms)) {
warning('Lack of labels on dist object might make plot difficult to interpret')
rwnms <- as.character(1:nrow(d))
}
} else {
# Convert data frame to matrix
rwnms <- character(0)
if (inherits(d, 'data.frame')){
if (is.character(d[[1]]) | is.factor(d[[1]])){
rwnms <- d[[1]]
d <- d[-1]
}
d <- as.matrix(d)
}
# If d is logical, converts to numeric
if (mode(d) == 'logical') d <- d*1
if (mode(d) != 'numeric') stop('d must be convertible to a numeric matrix')
if (length(di <- dim(d)) != 2) stop("'d' must have 2 dimensions")
nr <- di[1L]
nc <- di[2L]
if (nr <= 1 || nc <= 1) stop("'d' must have at least 2 rows and 2 columns")
# Get rownames if haven't gotten them already
if (length(rwnms) == 0){
if(length(rownames(d))>1){
rwnms <- rownames(d)
} else {
rwnms <- as.character(1:nrow(d))
}
}
# Order the rows and columns
if (method == 'seriate'){
if (is.null(seriation_method)) seriation_method <- 'PCA'
if (independent){
o <- seriation::seriate(dist(d, method = dist_method), method=seriation_method)
row_order <- seriation::get_order(o)
o <- seriation::seriate(dist(t(d), method = dist_method), method=seriation_method)
col_order <- seriation::get_order(o)
} else {
o <- seriation::seriate(d, method=seriation_method)
row_order <- seriation::get_order(o, dim = 1)
col_order <- seriation::get_order(o, dim = 2)
}
} else if (method == 'hclust'){
hclustrows <- hclust(dist(d, method = dist_method), method = hc_method)
row_order <- hclustrows$order
hclustcols <- hclust(dist(t(d), method = dist_method), method = hc_method)
col_order <- hclustcols$order
} else {
stop("method must be 'seriate' or 'hclust'")
}
}
if (rev_row) row_order <- rev(row_order)
if (rev_col) col_order <- rev(col_order)
if (scale. == 'row'){
d <- t(scale(t(d))[,1])
} else if (scale. == 'column'){
d <- scale(d)[,1]
}
d <- as.data.frame(d)
rwnms <- factor(rwnms, levels = rwnms[row_order])
d <- dplyr::bind_cols(rowname=rwnms, d)
p <-
d %>%
tidyr::gather(key = key, value = value, -1) %>%
mutate(
key = factor(key, levels = colnames(d[-1])[col_order]),
) %>%
ggplot(aes_string('key', 'rowname', fill = 'value')) +
geom_tile() +
scale_fill_viridis_c(option = viridis_option) +
labs(x = "", y = "") +
theme_minimal()
if (wrap_labels) p <- p + scale_x_discrete(labels = scales::label_wrap(10))
if (rotate_labels) p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
if (!is.null(ann_col)){
nms <- names(ann_col)
ann_col$value = 1
h <- nrow(d)/20
p <- p +
ggnewscale::new_scale_fill() +
geom_tile(data = ann_col, aes_string(x = nms[1], y = nrow(d) + h, fill = nms[2]), height = h, width = 1) +
coord_cartesian(clip = 'off') +
theme(
plot.margin = unit(c(2,1,1,1), "lines")
)
}
return(p)
}
#' @title regressiontable
#' @description Create gt table of regression coefficients
#' @param models Named list of lm and glm models.
#' @param caption Optional caption for table, Default: NULL
#' @param sigfig The number of significant digits to display, Default: 3
#' @return A gt table
#' @details Currently only supports lm and glm models
#' @examples
#' \dontrun{
#' if(interactive()){
#' models <- list(
#' "model 1" = lm(mpg ~ hp, mtcars),
#' "model 2" = glm(am ~ hp, family=binomial, mtcars)
#' )
#' regressiontable(models)
#' }
#' }
#' @seealso
#' \code{\link[broom]{tidy}}
#' \code{\link[gt]{gt}}
#' @rdname regressiontable
#' @export
#' @importFrom purrr map_df map
#' @importFrom broom tidy glance
#' @importFrom stringr str_glue_data
#' @importFrom gt gt cols_label fmt_number fmt_scientific tab_footnote tab_style cells_row_groups cell_text cells_body
regressiontable <- function(models, caption = NULL, sigfig = 3){
for (m in models){
if (!class(m)[1] %in% c('lm', 'glm', 'lmerModLmerTest')) stop('only lm, glm, and lmerModLmerTest models are supported')
}
tidy2 <- function(m){
x <- broom::tidy(m, conf.int = T)
if (class(m)[1] == 'lmerModLmerTest'){
x <- x[x$effect == 'fixed',]
x$effect <- NULL
x$group <- NULL
x$df <- NULL
}
return(x)
}
glance2 <- function(m){
x <- broom::glance(m)
if (class(m)[1] == 'lmerModLmerTest'){
x$nobs <- nobs(m)
}
return(x)
}
model_stats <- purrr::map_df(models, ~tidy2(.), .id = 'Model')
names(model_stats) <- c('Model', 'Variable', 'Estimate', 'Std.Err', 'Statistic', 'P-value', 'Lower 95% CI', 'Upper 95% CI')
glue_dict <- c(
'lm' = 'N={nobs}; Rsq={r.squared}; Adj.Rsq={adj.r.squared}; F({df},{df.residual})={statistic}; p={p.value}',
'glm' = 'N={nobs}; Null deviance={null.deviance} on {df.null} df; Residual deviance={deviance} on {df.residual} df',
'lmerModLmerTest' = 'N={nobs}; Sigma={sigma}; df.residual={df.residual}'
)
model_summaries <-
purrr::map(models, ~stringr::str_glue_data(signif(glance2(.), sigfig), glue_dict[class(.)[1]]))
names(model_summaries) <- names(models)
thetable <- model_stats %>%
gt::gt(groupname_col = 'Model', caption = caption) %>%
gt::cols_label(Variable = '') %>%
gt::fmt(columns = c(3:5, 7, 8), fns = function(x) signif(x, sigfig)) %>%
# gt::fmt_number(columns = c(4:5, 7:8), n_sigfig = sigfig) %>%
gt::fmt_scientific(columns = 6, decimals = 1) %>%
gt::tab_style(
style = gt::cell_text(indent = gt::px(40)),
locations = gt::cells_body(columns = 'Variable')
)
for(i in 1:length(model_summaries)){
thetable <- gt::tab_footnote(thetable, model_summaries[[i]], location=gt::cells_row_groups(groups = names(model_summaries[i])))
}
return(thetable)
}
#' @title ggmediation
#' @description Plot ACME and ADE from objects produced by mediate function in mediation package
#' @param obj An object produced by the mediate function from the mediation package.
#' @return A ggplot
#' @details Produces a ggplot version of the mediation plot from the mediation package.
#' @examples
#' \dontrun{
#' if(interactive()){
#' library(mediation)
#' m_med <- lm(hp ~ wt, mtcars)
#' m_out <- lm(mpg ~ hp + wt, mtcars)
#' m <- mediate(m_med, m_out, treat='wt', mediator='hp')
#' ggmediation(m)
#' }
#' }
#' @rdname ggmediation
#' @export
ggmediation <- function(obj){
d <-
tibble(
ACME.point = obj$d.avg,
ACME.low = obj$d.avg.ci[1],
ACME.high = obj$d.avg.ci[2],
ADE.point = obj$z.avg,
ADE.low = obj$z.avg.ci[1],
ADE.high = obj$z.avg.ci[2],
Total.point = obj$tau.coef,
Total.low = obj$tau.ci[1],
Total.high = obj$tau.ci[2]
) %>%
pivot_longer(everything()) %>%
separate(name, into = c('Stat', 'type'), sep = '\\.') %>%
pivot_wider(names_from = type, values_from = value)
p <-
ggplot(d, aes(point, Stat, xmin=low, xmax=high)) +
geom_pointrange(lwd=1.5, fatten = 2) +
labs(caption = paste0('Proportion mediated: ', signif(100*obj$n.avg, 2), '%'), x='', y='') +
theme_minimal(15)
if (all(c(d$low, d$high) < 0)) p <- p + xlim(c(NA, 0))
if (all(c(d$low, d$high) > 0)) p <- p + xlim(c(0, NA))
return(p)
}
codebib <- function(){
addkey <- function(x){
x$key <- paste0(x$author$family[[1]], x$year[[1]], sample(letters,1), sample(letters,1))
return(x)
}
d <- renv::dependencies() %>% dplyr::filter(Package != 'R')
map(d$Package, ~toBibtex(addkey(citation(.x))))
}
#' @title ggxtabs
#' @description Mosaic ggplot of xtabs object
#' @param xtab xtabs object of exactly two categorical variables
#' @param cell_counts Display cell counts, Default: F
#' @param stats Display Chisq stats in caption, Default: F
#' @param viridis_option Viridis color palette, Default: 'G'
#' @param border_color Color of border between cells, Default: 'white'
#' @return ggplot
#' @details Takes a table of exactly two categorical variables produced by xtabs and produces a ggplot mosaic plot
#' @examples
#' \dontrun{
#' if(interactive()){
#' ggxtabs(xtabs(~color+clarity, diamonds), viridis_option = 'H')
#' }
#' }
#' @import dplyr tidyr tibble ggplot2
#' @rdname ggxtabs
#' @export
ggxtabs <- function(xtab, cell_counts = F, stats = F, viridis_option = 'G', border_color = 'white'){
if (!"xtabs" %in% class(xtab)) stop('xtab must be an xtabs object')
d <- as_tibble(xtab)
if (ncol(d) != 3) stop('Two categorical variables only')
# put levels in order
d[[1]] <- factor(d[[1]], levels = unique(d[[1]]))
d[[2]] <- factor(d[[2]], levels = unique(d[[2]]))
nms <- names(d)
d2 <-
d %>%
group_by(across(2)) %>%
summarise(sum = sum(n)) %>%
mutate(
xmax = cumsum(sum),
xmin = c(0, xmax[1:length(xmax)-1]),
xmid = (xmin + xmax)/2
)
d <-
left_join(d, d2) %>%
group_by(across(2)) %>%
mutate(
ymax = cumsum(n)/sum(n),
ymin = c(0, ymax[1:length(ymax)-1]),
ymid = (ymin + ymax)/2
)
d3 <- unique(d[c(2, 7)])
# Dealing with ggplot weirdness (or my ignorance)
d3$xmin = 0
d3$xmax = 0
d3$ymin = 0
d3$ymax = 0
p <-
ggplot(d, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)) +
geom_rect(aes_string(fill=nms[1]), color=border_color)
if (cell_counts){
p <- p + geom_label(aes(x=xmid, y = ymid, label = n), label.size=0)
}
if (stats){
st <- summary(xtab)
stat_str <- glue::glue("N={st$n.cases}, χ²={signif(st$statistic, 3)}, df={st$parameter}, p={signif(st$p.value, 2)}")
p <- p + labs(caption=stat_str)
}
p +
geom_text(data=d3, aes_string(x='xmid', label=nms[2]), y = 1.05) +
scale_fill_viridis_d(option = viridis_option) +
scale_y_continuous(limits = c(0, 1.05)) +
guides(fill = guide_legend(reverse = T)) +
coord_cartesian(clip = 'off') +
xlab(nms[2]) +
theme_void() +
theme(axis.title.x = element_text())
}
#' @title model_stats
#' @description Return a named list of estimates, standard errors, and p-values for one or more regression models.
#' @param ... Either a single regression model object, or multiple named models, e.g., m1 = m1, m2 = m2, ...
#' @return A named list of model parameters and statistics
#' @details Uses the tidy functions from broom and broom.mixed to return a named list of model stats that can be easily used inline in rmarkdown documents.
#' @examples
#' \dontrun{
#' if(interactive()){
#' m1 <- lm(mpg ~ hp, mtcars)
#' m2 <- lm(mpg ~ wt, mtcars)
#' x <- model_stats(m1 = m1, m2 = m2)
#'
#' x$m1$hp$estimate
#' x$m1$hp$str # formatted beta coefficient with 95% CI.
#' }
#' }
#' @seealso
#' \code{\link[broom.mixed]{reexports}}
#' \code{\link[glue]{glue}}
#' @rdname model_stats
#' @export
#' @importFrom broom.mixed tidy
#' @importFrom glue glue
model_stats <- function(...){
tdy <- function(m){
broom.mixed::tidy(m, conf.int = T) %>%
mutate(
str = glue::glue("$\\beta={signif(estimate, 2)}$ ({signif(conf.low, 2)}, {signif(conf.high, 2)})")
) %>%
split(.$term)
}
models <- list(...)
if (length(models) == 1) return(tdy(models[[1]]))
nms <- names(models)
if (is.null(nms) | "" %in% nms) stop('all models must be named, e.g., m1 = m, ...')
map(models, tdy)
}
#' @title twittersize
#' @description Add borders to an image to force an aspect ratio for twitter post, and write it out in png format. Does not resize original image.
#' @param path Path to image file
#' @param out_prefix Prepend to name of output image, Default = "twitter"
#' @param border_color Border color, Default = "white"
#' @param aspect_ratio Desired aspect ratio, Default = "16:9" (single image in tweet). Use "7:8" if tweeting two images.
#' @return path to output image
#' @export
#' @examples
#' \dontrun{
#' if(interactive()){
#' twittersize('~/Desktop/image.png')
#' }
#' }
twittersize <- function(path, aspect_ratio = "16:9", out_prefix = "twitter ", border_color = 'white'){
ar <- as.numeric(strsplit(aspect_ratio, ':')[[1]])
width <- ar[1]
height <- ar[2]
img <- magick::image_read(path)
info <- magick::image_info(img)
if (info$width > width*info$height/height){
border_height <- round(((height*info$width/width) - info$height)/2)
geometry <- paste0("0x", border_height)
} else {
border_width <- round(((width*info$height/height) - info$width)/2)
geometry <- paste0(border_width, 'x0')
}
img <- magick::image_border(img, geometry = geometry, color = border_color)
fn <- paste0(out_prefix, basename(tools::file_path_sans_ext(path)), '.png')
path_out <- file.path(dirname(path), fn)
magick::image_write(img, path = path_out, format = "png")
}
#' @title create_dict
#' @description Print out a translation dictionary given a char vector of names
#' @param v Character vector of names
#' @return A printout out of a named vector template for a translation dictionary
#' @details A printout out of a named vector template for a translation dictionary
#' @examples
#' \dontrun{
#' if(interactive()){
#' create_dict(names(mtcars))
#' }
#' }
#' @rdname create_dict
#' @export
create_dict <- function(v){
cat("c(\n")
cat(paste0(' "', v, '" = ', '"",\n'))
cat(")")
}
#' @title reorder_levels
#' @description Order factor levels based on a subset of values (e.g., only female values, or only male values)
#' @param f Character vector used as a factor variable
#' @param v Numeric vector used to order the factor levels
#' @param index Vector used to divide f and v into exactly 2 groups (e.g., Males, Females)
#' @param choice The value of index to use to (e.g., Female)
#'
#' @return An character vector to be used in the level argument of factor()
#' @export
reorder_levels <- function(f, v, index, choice){
if (!is.character(f)) stop('f must be a character vector')
if (!is.numeric(v)) stop('v must be numeric')
indices <- unique(index)
if (length(indices) != 2) stop('index must have exactly 2 unique values')
if (!setequal(f[index == indices[1]], f[index == indices[2]])) stop("factor levels for each index value must be equal")
levels(forcats::fct_reorder(f[index == choice], v[index == choice]))
}
#' @title joint_missingness
#' @description Upset plot of missinging by combinations of variables
#' @param d data frame
#' @param ... Arguments passed on to upset function
#' @return Upset plot
#' @details Plot indicates number of missing cases for different combinations of variables
#' @examples
#' \dontrun{
#' if(interactive()){
#' joint_missingness(airquality)
#' }
#' }
#' @seealso
#' \code{\link[UpSetR]{upset}}
#' @rdname joint_missingness
#' @export
#' @importFrom dplyr mutate across
#' @importFrom tidyselect everything
#' @importFrom UpSetR upset
joint_missingness <- function(d, ...){
d |>
data.frame() |>
dplyr::mutate(dplyr::across(tidyselect::everything(), \(x) as.integer(is.na(x)))) |>
UpSetR::upset(...)
}
#' scale2
#'
#' @param x A numeric matrix(like object).
#' @description For numeric vectors, returns a scaled numeric vector (not an array); otherwise returns scale(x)
#' @return A scaled vector or matrix or array
#' @export
#'
#' @examples
#' x <- scale2(mtcars$mpg)
#' class(x)
scale2 <- function(x){
if(class(x) == 'numeric') return(c(scale(x)))
return(scale(x))
}
# https://github.com/tidymodels/broom/issues/1096#issuecomment-1116921504
tidy_svyglm <- function(x, conf.int = FALSE, conf.level = 0.95,
exponentiate = FALSE, ddf = NULL, ...) {
ret <- as_tibble(summary(x, df.resid = ddf)$coefficients, rownames = "term")
colnames(ret) <- c("term", "estimate", "std.error", "statistic", "p.value")
coefs <- tibble::enframe(stats::coef(x), name = "term", value = "estimate")
ret <- left_join(coefs, ret, by = c("term", "estimate"))
if (conf.int) {
ci <- broom_confint_terms(x, level = conf.level, ddf = ddf, ...)
ret <- left_join(ret, ci, by = "term")
}
if (exponentiate) {
ret <- exponentiate(ret)
}
ret
}
environment(tidy_svyglm) <- asNamespace("broom")
assignInNamespace("tidy.svyglm",
value = tidy_svyglm,
ns = "broom")
#' @title dagitty2graph: Graph conversion from dagitty to tbl_graph
#'
#' @description Convert a dagitty object to a tbl_graph object.
#' @param g A dagitty object ("dag" or "pdag").
#'
#' @export
#'
#' @examples
#' graph <- dagitty2graph(g)
#'
#' @return An tbl_graph object.
dagitty2graph<- function(g)
{
# nodes to data.frame
nodes <- data.frame(name = names(g))
# edges to data.frame
edges0 <- dagitty::edges(g)
edges <- data.frame(from = edges0$v, to = edges0$w)
as_tbl_graph(list(nodes = nodes, edges = edges), directed = TRUE)
}
insertInlineAddin <- function() {
rstudioapi::insertText("`{r} `")
}
#' @title signif2
#' @description Convert numeric vector to character vector with specified number of significant digits, retaining trailing zeros
#' @param x Numeric vector
#' @param sigfigs Number of significant digits, Default: 2
#' @param format 'g': use scientific notation when more compact; 'fg': numbers in xx.yy format, Default: 'g'
#' @return Character vector of numbers formatted to the specified number of sigfigs
#' @details Character and logical vectors are returned as is.
#' @examples
#' \dontrun{
#' if(interactive()){
#' signif2(rnorm(10))
#' }
#' }
#' @rdname signif2
#' @export
signif2 <- function(x, sigfigs=2, format="g"){
if (is.numeric(x)) {
out <- formatC(x, digits=sigfigs, format=format, flag="#")
out <- gsub("\\s*NA", "", out)
return(out)
} else {
out <- as.character(x)
out[is.na(out)] <- ""
return(out)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.