#'Simple Generalized Linear Models
#'
#'@description Fit simple GLM.
#'
#'@param data a data frame with the variables.
#'@param response a character value indicating the response variable.
#'@param ... character values indicating confounding variables.
#'@param family a character indicating family distribution. See more \code{\link[stats]{family}}.
#'@param robust.variance a function yielding a covariance matrix or a covariance matrix. See more \code{\link[lmtest]{coeftest}}.
#'@param increment a named list indicating the magnitude of increments to calculate odds ratio for continuous covariates.
#'@param ci.type a character value indicating the procedure to calculate confidence intervals: likelihood ratio (\code{profile}) or wald (\code{Wald})
#'@param conf.level a numerical value indicating the confidence level for parameters of interest.
#'@param exponentiate a logical value indicating whether coefficients should be exponentiated.
#'@param format a logical value indicating whether the output should be formatted.
#'@param labels a list of labels with components given by their variable names.
#'@param digits a numerical value defining of digits to present the results.
#'@param digits.p a numerical value defining number of digits to present the p-values.
#'@param save a logical value indicating whether the output should be saved as a csv file.
#'@param file a character indicating the name of output file in csv format to be saved.
#'@examples
#'library(titanic)
#'library(dplyr)
#'
#'data(titanic_train)
#'titanic_nt <- titanic_train |>
#' mutate(Sex = factor(Sex,
#' levels = c("male", "female"),
#' labels = c("Male", "Female")),
#' Pclass = factor(Pclass,
#' levels = 1:3,
#' labels = c("I", "II", "III")),
#' Embarked = factor(Embarked,
#' levels = c("C", "Q", "S"),
#' labels = c("Cherbourg", "Queenstown", "Southampton")))
#'
#'titanic_nt |> select(Survived, Sex, Age, Pclass, Embarked) |>
#' nt_simple_glm(response = Survived, Age,
#' family = binomial(link = "logit"),
#' exponentiate = TRUE,
#' labels = list(Pclass = "Passenger class"))
#'
#'@import titanic
#'@importFrom purrr pmap map2
#'@importFrom dplyr select
#'@importFrom rlang enquo quos .data
#'@importFrom tibble as_data_frame
#'@importFrom utils write.csv
#'
#'@export
nt_simple_glm <- function(data, response, ...,
family, robust.variance = NULL,
increment = NULL,
ci.type = "Wald", conf.level = 0.95,
exponentiate = FALSE,
format = TRUE, labels = NULL,
digits = 2, digits.p = 3,
save = FALSE, file = "simple_logistic"){
response <- enquo(response)
aux <- quos(...)
vars <- select(.data = data, -!!response)
response <- select(.data = data, !!response)
if (length(aux) > 0){
for (i in 1:length(aux)){
vars <- select(.data = vars, -!!aux[[i]])
}
add <- select(.data = data, !!!aux)
add.name <- names(add)
add.label <- map2(add, add.name, extract_label)
} else {
add <- NULL
add.name <- NULL
add.label <- NULL
}
vars.name <- names(vars)
response.name <- names(response)
if (!is.null(labels)){
vars <- data_labeller(vars, labels)
vars.label <- map2(.x = vars, .y = as.list(vars.name),
.f = extract_label)
response <- data_labeller(response, labels)
response.label <- extract_label(response, response.name)
} else {
vars.label <- map2(.x = vars, .y = as.list(vars.name),
.f = extract_label)
}
temp <- pmap(.l = list(vars, vars.name, vars.label),
.f = aux_simple_glm,
response = response[[1]], response.label = response.label,
add = add, add.name = add.name, add.label = add.label,
family = family, robust.variance = robust.variance,
increment = increment, exponentiate = exponentiate,
conf.level = conf.level, ci.type = ci.type,
format = format)
out <- Reduce(rbind, temp)
if (format){
out <- out |> mutate(null.deviance = round(.data$null.deviance, digits),
logLik = round(.data$logLik, digits),
AIC = round(.data$AIC, digits),
BIC = round(.data$BIC, digits),
deviance = round(.data$deviance, digits)) |>
transmute(Variable = .data$term, Group = .data$group,
`Estimate (95% CI)` = ifelse(.data$estimate == 1 &
is.na(.data$conf.low) &
is.na(.data$conf.high),
"Reference",
paste0(round(.data$estimate, digits), " (",
round(.data$conf.low, digits), " ; ",
round(.data$conf.high, digits), ")")),
`Wald p value` = ifelse(round(.data$p.value, digits.p) == 0, "< 0.001",
as.character(round(.data$p.value, digits.p))),
`LR p value` = ifelse(round(.data$p.value.lr, digits.p) == 0, "< 0.001",
as.character(round(.data$p.value.lr, digits.p))),
n = as.character(.data$n),
null.deviance = as.character(.data$null.deviance),
logLik = as.character(.data$logLik),
AIC = as.character(.data$AIC),
BIC = as.character(.data$BIC),
deviance =as.character(.data$deviance)) |>
mutate(`Estimate (95% CI)` =
recode(.data$`Estimate (95% CI)`,
`NA (NA ; NA)` = "Reference")) |>
replace_na(list(`Wald p value` = "", `LR p value` = "",
n = "", null.deviance = "", df.null = "",
logLik = "", AIC = "", BIC = "",
deviance = "", df.residual = ""))
}
if (save){
write.csv(out, file = paste0(file, "_regression.csv"))
}
return(out)
}
aux_simple_glm <- function(var, var.name, var.label,
response, response.label,
add, add.name, add.label,
family, robust.variance,
increment, exponentiate,
conf.level, ci.type,
format){
if (!is.null(add)){
aux <- data.frame(add, var)
aux.labels <- c(add.label, var = var.label)
aux.names <- c(add.name, var.name)
data.model <- data.frame(response = response, add = add, var = var)
} else {
aux <- data.frame(var)
aux.labels <- c(var = var.label)
aux.names <- var.name
data.model <- data.frame(response = response, var = var)
}
tab.labels <- list()
tab.levels <- list()
for (i in 1:ncol(aux)){
if (is.numeric(aux[, i])){
tab.levels[[names(aux.labels)[i]]] <- ifelse(is.null(increment[[aux.names[[i]]]]),
"every 1 unit of change",
paste0("every ",
increment[[aux.names[i]]],
" unit of change"))
} else {
if (!is.factor(aux[, i]))
aux[, i] <- as.factor(aux[, i])
lv <- levels(aux[, i])
tab.levels[[names(aux.labels)[i]]] <- lv[1:length(lv)] #paste0(lv[2:length(lv)], "/", lv[1])
}
tab.labels[[names(aux.labels)[i]]] <- rep(aux.labels[[i]], length(tab.levels[[names(aux.labels)[i]]]))
}
out <- fit_simple_glm(data.model, family,
tab.labels, tab.levels, var.label,
robust.variance, increment[[var.name]],
exponentiate, conf.level, ci.type)
if (format){
out$p.value.lr = ifelse(duplicated(out$term), NA, out$p.value.lr)
out$term = ifelse(duplicated(out$term), "", out$term)
}
return(out)
}
#'@importFrom broom tidy glance
#'@importFrom stats na.exclude glm anova
#'@importFrom lmtest coeftest
#'@importFrom stringr str_which
fit_simple_glm <- function(data, family,
tab.labels, tab.levels, var.label,
robust.variance, increment,
exponentiate, conf.level, ci.type){
data <- na.exclude(data)
if (!is.null(increment))
data[["var"]] <- data[["var"]]/increment
fit <- glm(response ~ ., data = data, family = family)
if (is.null(robust.variance)){
temp <- tidy(fit, exponentiate = exponentiate,
conf.level = conf.level,
conf.type = ci.type,
conf.int = TRUE)
} else {
step <- coeftest(fit, vcov. = robust.variance)
temp <- tidy(step, exponentiate = exponentiate,
conf.level = conf.level,
conf.int = TRUE)
}
nlv <- lapply(tab.levels, length)
for (i in 1:length(nlv)){
if (nlv[[i]] > 1){
add.row <- str_which(temp$term, names(tab.levels)[i])[1]
temp <- rbind(temp[1:(add.row-1), ], NA,
temp[add.row:nrow(temp), ])
temp[add.row, 1] <- paste0(names(tab.levels)[i], tab.levels[[i]][1])
temp[add.row, 2] <- 1
}
}
temp$term <- c("(Intercept)", unlist(tab.labels))
temp$group <- c("", unlist(tab.levels))
temp <- temp[, c(1, 8, 2, 6, 7, 5)]
if (exponentiate)
temp <- temp[-1, ]
p.value.lr <- rep(NA, length(unique(unlist(tab.labels))))
for (i in 2:ncol(data)){
if (ncol(data) > 2){
fit0 <- glm(response ~ ., data = data[, -i], family = family)
p.value.lr[(i-1)] <- anova(fit0, fit, test = "Chisq")$`Pr(>Chi)`[2]
} else {
fit0 <- glm(response ~ 1, data = data, family = family)
p.value.lr[(i-1)] <- anova(fit0, fit, test = "Chisq")$`Pr(>Chi)`[2]
}
}
aux <- data.frame(p.value.lr = p.value.lr, n = nrow(data), glance(fit))
out <- merge(temp, aux, by = 0, all = TRUE, sort = TRUE)[, -1]
return(out)
}
#'Multivariable Generalized Linear models
#'
#'@description Tabulating results from multivariable GLMs.
#'
#'@param fit a glm object.
#'@param exponentiate a logical value indicating whether coefficients should be exponentiated.
#'@param ci.type a character value indicating the procedure to calculate confidence intervals: likelihood ratio (\code{profile}) or wald (\code{Wald}).
#'@param user.contrast a variable named list of numerical vectors indicating contrast for a covariate.
#'@param user.contrast.interaction a variable named list of numerical vectors indicating a contrast for interaction.
#'@param table.reference a logical value indicating whether the output should be presented with a line indicating the reference category.
#'@param format a logical value indicating whether the output should be formatted.
#'@param labels a list of labels with components given by their variable names.
#'@param digits a numerical value defining of digits to present the results.
#'@param digits.p a numerical value defining number of digits to present the p-values.
#'@param save a logical value indicating whether the output should be saved as a csv file.
#'@param file a character indicating the name of output file in csv format to be saved.
#'
#'@examples
#'library(titanic)
#'library(dplyr)
#'
#'data(titanic_train)
#'dt <- titanic_train |> mutate(Sex = factor(Sex,
#' levels = c("male", "female"),
#' labels = c("Male", "Female")),
#' Pclass = factor(Pclass,
#' levels = 1:3,
#' labels = c("I", "II", "III"))
#' )
#'
#'fit <- glm(Survived ~ Age + Sex + Pclass, data = dt, family = "binomial")
#'
#'nt_multiple_glm(fit, exponentiate = TRUE)
#'
#'@importFrom purrr map
#'@importFrom utils write.csv
#'@export
nt_multiple_glm <- function(fit, exponentiate = FALSE,
ci.type = "Wald",
user.contrast = NULL, user.contrast.interaction = NULL,
table.reference = TRUE,
format = TRUE, labels = NULL,
digits = 2, digits.p = 3,
save = FALSE, file = "nt_multiple_glm"){
out <- aux_multiple_glm(fit = fit,
exponentiate = exponentiate,
ci.type = ci.type,
user.contrast = user.contrast,
user.contrast.interaction = user.contrast.interaction,
format = format, table.reference = table.reference)
ref <- reference_df(fit)$ref
if (format)
out$effect <- out$effect |>
transmute(Variable = .data$variable, Group = .data$group,
`Estimate (95% CI)` = ifelse(is.na(.data$estimate),
"Reference",
paste0(round(.data$estimate, digits), " (",
round(.data$conf.low, digits), " ; ",
round(.data$conf.high, digits), ")")),
`p value` = ifelse(is.na(.data$p.value), "",
ifelse(round(.data$p.value, digits.p) == 0, "< 0.001",
as.character(round(.data$p.value, digits.p))))) |>
replace_na(list(`p value` = ""))
if (!is.null(labels)){
aux_labels <- labels
names(aux_labels) <- paste0("^", names(aux_labels), "$")
out$effect <- out$effect |>
mutate(Variable =
str_replace_all(.data$Variable, unlist(aux_labels)))
}
if (save)
write.csv(out$effect, file = paste0(file, ".csv"))
out <- list(effect = out$effect, coef = out$coef, ref = ref)
return(out)
}
#'@importFrom dplyr mutate group_by ungroup rename
#'@importFrom stringr str_replace_all
#'@importFrom tidyr separate
#'@importFrom broom tidy
aux_multiple_glm <- function(fit, exponentiate, robust.variance,
ci.type, user.contrast, user.contrast.interaction,
format, table.reference){
aux <- extract_data(fit)
effect <- fit_multiple_glm(fit, fit.vars = aux,
exponentiate = exponentiate,
robust.variance = robust.variance,
type = ci.type,
user.contrast = user.contrast,
user.contrast.interaction = user.contrast.interaction,
table.reference = table.reference) |>
separate(.data$term, into = c("variable", "group"), sep = ":")
if (format)
effect <- effect |> group_by(.data$variable) |>
mutate(aux_variable = ifelse(duplicated(.data$variable), "", .data$variable)) |>
ungroup(.data$variable) |> select(-.data$variable) |>
rename(variable = .data$aux_variable)
# temp <- unlist(aux$var.labels)
# labels <- paste0(temp, " ")
# names(labels) <- names(temp)
# coef <- tidy(fit) |>
# mutate(term = str_replace_all(.data$term, labels),
# term = sub(" $", "", x = .data$term))
out <- list(effect = effect)
return(out)
}
#'@importFrom stats model.matrix formula setNames anova vcov glm update.formula
fit_multiple_glm <- function(fit, fit.vars, exponentiate, robust.variance,
type, user.contrast, user.contrast.interaction,
table.reference){
ref <- reference_df(fit)$df
beta <- as.numeric(fit$coefficients)
beta.var <- as.matrix(vcov(fit))
term.labels <- attr(fit$terms, "term.labels")
interaction <- colnames(attr(fit$terms, "factors"))[attr(fit$terms, "order") > 1]
for (i in 1:length(fit.vars$var)){
if (length(interaction) > 0){
cond.interaction <- grepl(fit.vars$var[i], x = interaction, fixed = TRUE)
} else {
cond.interaction <- FALSE
}
if (all(!cond.interaction)){
temp <- contrast_df(data = fit.vars$data,
var = fit.vars$var[i],
ref = ref,
user.contrast = user.contrast,
table.reference = table.reference)
design.matrix <- model.matrix(formula(fit), data = temp$new.data)
drop <- which(grepl(fit.vars$var[i], x = as.character(term.labels), fixed = TRUE))
fit0 <- glm(update.formula(fit$formula,
paste0(" ~ . - ", paste(term.labels[drop],
collapse = " - "))),
data = na.exclude(fit.vars$data), family = fit$family)
contrast <- contrast_calc(fit = fit, fit0 = fit0,
design.matrix = design.matrix,
beta = beta, beta.var = beta.var,
type = type)
if (table.reference)
contrast <- rbind(NA, contrast)
temp <- data.frame(term = temp$label, contrast)
if (type == "lr")
temp[1:2, 5] <- temp[2:1, 5]
if (i > 1)
temp <- rbind(out, temp)
out <- temp
} else {
for (k in which(cond.interaction)){
interaction.vars <- fit.vars$var[sapply(fit.vars$var, grepl,
x = as.character(interaction[k]),
fixed = TRUE)]
others <- interaction.vars[interaction.vars != fit.vars$var[i]]
temp <- contrast_df(data = fit.vars$data, var = fit.vars$var[i],
ref = ref, user.contrast = user.contrast,
interaction = others,
user.contrast.interaction = user.contrast.interaction)
design.matrix <- sapply(temp$new.data, function(x) model.matrix(fit, x), simplify = FALSE)
drop <- which(grepl(fit.vars$var[i], x = as.character(term.labels), fixed = TRUE))
fit0 <- glm(update.formula(fit$formula, paste0(" ~ . - ", paste(term.labels[drop], collapse = " - "))),
data = fit.vars$data, family = fit$family)
contrast <- contrast_calc(fit = fit, fit0 = fit0, design.matrix = design.matrix,
beta = beta, beta.var = beta.var,
type = type)
if (table.reference){
aux.contrast <- list()
index <- 1
for (j in 1:length(temp$label)){
if (j %in% temp$seq_nl){
aux.contrast[[j]] <- matrix(NA, ncol = ncol(contrast))
colnames(aux.contrast[[j]]) <- c("estimate", "lower", "upper", "p.value")
} else {
aux.contrast[[j]] <- contrast[index, ]
index <- index + 1
}
}
contrast <- Reduce(rbind, aux.contrast)
}
contrast <- Reduce(rbind, aux.contrast)
temp <- data.frame(term = temp$label, contrast)
if (i > 1)
temp <- rbind(out, temp)
out <- temp
}
}
}
if (exponentiate)
out[, 2:4] <- exp(out[, 2:4])
rownames(out) <- NULL
colnames(out) <- c("term", "estimate", "conf.low", "conf.high", "p.value")
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.