dim_warn <- function(tab) {
if(!identical(dim(tab), c(2L, 2L))) {
warning("Input must be a 2x2 table to compute OR and CI", call = FALSE)
}
}
OR <- function(tab) {
dim_warn(tab)
if (identical(dim(tab), c(2L, 2L))) {
out <- tryCatch(
(tab[1, 1] * tab[2, 2]) / (tab[1, 2] * tab[2, 1]),
error = function(e) NA
)
} else {
out <- NA
}
out <- c(OR = out)
return(out)
}
OR_confint <- function(tab) {
dim_warn(tab)
odds_ratio <- suppressWarnings(OR(tab))
log_OR <- log(odds_ratio)
diff <- tryCatch(
1.96*sqrt(1/tab[1,1] + 1/tab[1,2] + 1/tab[2,1] + 1/tab[2,2]),
error = function(e) NA
)
out <- c(odds_ratio,
exp(log_OR - diff),
exp(log_OR + diff))
names(out) <- c("OR", "CI_lower", "CI_upper")
return(out)
}
test_pearson <- function(tab) {
out <-
tryCatch(chisq.test(tab)$p.value,
error = function(e) NA
)
return(c(pearson_p = out))
}
test_fisher <- function(tab) {
fish_test <- tryCatch(
fisher.test(tab),
error = function(e) NA
)
if (!all(is.na(fish_test))) {
p <- fish_test$p.value
if(is.null(p)) p <- NA
ci <- fish_test$conf.int
if(is.null(ci)) ci <- c(NA, NA)
out <- c(ci, p)
} else {
out <- rep(NA, 3)
}
names(out) <- c("fisher_CI_lower", "fisher_CI_upper", "fisher_p")
return(out)
}
test_prop <- function(tab) {
pref_test <-
if(any(unlist(tab) < 5)) {
c(pref_test = "Fisher")
} else {
c(pref_test = "Pearson")
}
df_1 <- c(OR_confint(tab), test_pearson(tab), test_fisher(tab))
df_1_cols <- names(df_1)
df_1 <- matrix(df_1,
nrow = 1,
byrow = TRUE)
df_1 <- as.data.frame(df_1, stringsAsFactors = FALSE)
colnames(df_1) <- df_1_cols
out <- cbind(df_1, pref_test)
return(out)
}
make_tabulatable <- function(x) {
if(!(class(x) %in% c("factor", "character"))) {
return(as.character(x))
} else {
return(x)
}
}
tab_rf <- function(predictor, response, data) {
if (is.factor(data[, predictor])) {
predictor_values <- levels(data[, predictor])
} else {
predictor_values <- sort(unique(data[, predictor]))
}
base_val <- predictor_values[1]
predictor_values <- predictor_values[-1]
data[, predictor] <- make_tabulatable(data[, predictor])
out <- vector(mode = "list", length = length(predictor_values))
names(out) <- predictor_values
for (pred_val in predictor_values) {
row_idx <- which(data[, predictor] %in% c(base_val, pred_val))
# Subset data and compare to baseline
data_tmp <- data[row_idx, ]
# Drop unused levels before tabulation if column is a factor
if (is.factor(data[, predictor])) {
data_tmp[, predictor] <- droplevels(data_tmp[, predictor])
}
out[[pred_val]] <- table(data_tmp[, predictor],
data_tmp[, response])
}
return(out)
}
tab_rn <- function(predictor, response, data) {
p_vec <- as.character(data[, predictor])
r_vec <- as.character(data[, response])
idx <- !is.na(p_vec) & !is.na(r_vec)
p_vec <- p_vec[idx]
r_vec <- r_vec[idx]
pred_vals <- unique(p_vec)
out <- vector(mode = "list", length = length(pred_vals))
names(out) <- pred_vals
for (pr_val in pred_vals) {
p_vec_tmp <- rep(FALSE, length(p_vec))
idx <- p_vec == pr_val
p_vec_tmp[idx] <- TRUE
out[[pr_val]] <- table(p_vec_tmp, r_vec)
}
return(out)
}
tab_a <- function(predictor, response, data) {
return(table(data[, predictor], data[, response]))
}
test_categorical_engine <- function(data, predictor, response) {
int_1 <- lapply(predictor, tab_rf, response, data)
name_col <- lapply(int_1, length)
for (i in 1:length(name_col)) {
name_col[[i]] <- rep(predictor[i], name_col[[i]])
}
name_col <- unlist(name_col)
val_col <- unlist(lapply(int_1, names))
int_1 <- lapply(unlist(int_1, recursive = FALSE), test_prop)
col_names <- c("name", "value", colnames(int_1[[1]]))
out <- do.call(rbind.data.frame, int_1)
out <- cbind(name_col, val_col, out)
colnames(out) <- col_names
return(out)
}
#' @export
test_categorical <- function(data,
predictor,
response,
strata = NULL,
format = TRUE) {
# TODO fix the fact that this outputs a table that is all factors
if (!is.null(strata)) {
strata_levels <- get_new_levels(data, strata)
data[, strata] <- as.character(data[, strata])
tmp_data <- data
tmp_data[, strata] <- "Total"
data <- rbind(tmp_data, data)
by_strata <-
function(strata_val,
data,
predictor,
response,
strata) {
data <- data[which(data[, strata] == strata_val), ]
out <-
test_categorical_engine(data, predictor, response)
colnames(out)[-(1:2)] <- paste0(strata_val, "_",
colnames(out)[-(1:2)])
return(out)
}
out <-
lapply(strata_levels,
by_strata,
data,
predictor,
response,
strata)
out <- do.call(cbind, res)
} else {
out <- test_categorical_engine(data, predictor, response)
}
rownames(out) <- 1:nrow(out)
if (format) {
out <- select_test(out)
out <- format_OR(out)
out <- format_p(out)
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.