#' Exploring biviate regression results of a dataframe
#'
#' This function returns either a graph or table of bivariate regression results for a dataset.
#' The results are helpful when trying to first gain an understanding of relationships between
#' a large number of variables.
#'
#' @param data A matrix, data frame, or tibble.
#' @param dependent A string character of the dependent variable
#' @param independent A string vector of the different independent variables to test
#' @param control A string character of one control variable that can be used
#' @param p_value The significance threshold for a table output (defaults to 0.05). Graph is
#' fixed at 95 percent confidence intervals
#' @param type Specifies the type of output request. either 'graph' or 'table'
#' @param model_type Specificies whether you have an OLS or logit model
#'
#' @importFrom magrittr %>%
#'
#' @export
#'
#' @examples
#' explore_bivariate(data = mtcars, dependent = "mpg", independent = c("cyl", "disp", "hp", "drat"), control = "gear", p_value = 0.05, type = "table")
#' explore_bivariate(mtcars, names(mtcars)[1], names(mtcars)[-1])
explore_bivariate <- function(data, dependent, independent, control = FALSE,
p_value = 0.05, type = "graph",
model_type = "ols") {
# getting the names of the independnet variables
independent_text <- independent
independent_variable <- data[, independent_text]
# setting the dv
dependent_text <- dependent
dependent_variable <- data[, dependent_text]
# setting the iv list
iv_list <- independent_text
# determining whether ols or logit
modelType <- ifelse(tolower(model_type) == "ols", "gaussian",
ifelse(tolower(model_type) == "logit", "binomial",
"please select either 'ols' or 'logit'"))
if(control == FALSE) {
# running the models
models <- lapply(iv_list, function(x) {
broom::tidy(glm(substitute(dv ~ i, list(dv = as.name(dependent_text),
i = as.name(x))),
data = data,
family = modelType))
})
# getting output
if(type == "graph") {
data <- models %>%
tibble::tibble() %>%
tidyr::unnest(cols = c(.)) %>%
dplyr::filter(term != "(Intercept)") %>%
dplyr::select(term, estimate, std.error) %>%
dplyr::mutate(lower = (estimate - (std.error * 1.96)),
upper = (estimate + (std.error * 1.96)),
term = forcats::fct_reorder(term, estimate),
significance = ifelse(estimate > 0 & lower > 0, "positive",
ifelse(estimate < 0 & upper < 0,
"negative", "not significant")))
cols <- c("negative" = "red2", "not significant" = "gray",
"positive" = "#7CAE00")
results <- data %>%
ggplot2::ggplot(ggplot2::aes(x = term, y = estimate,
ymin = lower, ymax = upper, color = significance)) +
ggplot2::theme_minimal() +
ggplot2::geom_hline(yintercept = 0.0, color = "red", lty = 2) +
ggplot2::geom_point() +
ggplot2::geom_linerange() +
ggplot2::labs(title = "Results are from bivariate tests, not a single model.",
caption = "Graph results show a 95% confidence interval",
x = "", y = "Coefficient") +
ggplot2::coord_flip() +
ggplot2::scale_color_manual(values = cols)
return(results)
} else if(type == "table") {
results <- models %>%
tibble::tibble() %>%
tidyr::unnest(cols =c(.)) %>%
dplyr::filter(term != "(Intercept)") %>%
dplyr::mutate(significance = ifelse(p.value < p_value, 1, 0))
return(results)
} else {
return("Please select either 'graph' or 'table' for type")
}
} else {
control_text <- control
control_variable <- data[, control_text]
# running the models
models <- lapply(iv_list, function(x) {
broom::tidy(glm(substitute(dv ~ i + control_var,
list(dv = as.name(dependent_text),
control_var = as.name(control_text),
i = as.name(x))),
data = data,
family = modelType))
})
# getting output
if(type == "graph") {
data <- models %>%
tibble::tibble() %>%
tidyr::unnest(cols = c(.)) %>%
dplyr::filter(term != "(Intercept)",
term != control) %>%
dplyr::select(term, estimate, std.error) %>%
dplyr::mutate(lower = (estimate - (std.error * 1.96)),
upper = (estimate + (std.error * 1.96)),
term = forcats::fct_reorder(term, estimate),
significance = ifelse(estimate > 0 & lower > 0, "positive",
ifelse(estimate < 0 & upper < 0,
"negative", "not significant")))
cols <- c("negative" = "red2", "not significant" = "gray",
"positive" = "#7CAE00")
results <- data %>%
ggplot2::ggplot(ggplot2::aes(x = term, y = estimate,
ymin = lower, ymax = upper, color = significance)) +
ggplot2::theme_minimal() +
ggplot2::geom_hline(yintercept = 0.0, color = "red", lty = 2) +
ggplot2::geom_point() +
ggplot2::geom_linerange() +
ggplot2::labs(title = "Results are from bivariate tests, not a single model.",
caption = "Graph results show a 95% confidence interval",
x = "", y = "Coefficient") +
ggplot2::coord_flip() +
ggplot2::scale_color_manual(values = cols)
return(results)
} else if(type == "table") {
results <- models %>%
tibble::tibble() %>%
tidyr::unnest(cols =c(.)) %>%
dplyr::filter(term != "(Intercept)",
term != control) %>%
dplyr::mutate(significance = ifelse(p.value < p_value, 1, 0))
return(results)
} else {
return("Please select either 'graph' or 'table' for type")
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.