# TODO: This is NOT a contingency table. What do we call it instead?
#' Contingency Table
#'
#' Construct a contingency table showing all observed pairs between different states of variables.
#'
#' @param data A `data.frame` of at least 2 columns.
#' @param vars Optional `CharacterVector` of target
#' variables. Default: all variables within `data`.
#' @param p A `NumericVector` of probabilities passed to `quantile`. Default: `0.05`.
#' @param ... Additional options passed to `quantile`
#'
#' @details For each variable in `vars, `ql_matrix` first splits the
#' data for each observed state and then calculates the percentile
#' given by `p` for each of the remaining variables. So yes, this
#' is not a contingency table. Instead, the default value of `p`
#' is such that we're almost calulating the minimum value observed
#' for all variables for each state of a target variable. What
#' does this tell us? I have no idea.
#'
#' @return A `S3` `matrix` of class `ql_mat`.
#'
#' @examples
#' x <- data.frame(a = c(0, 1), b = c(1, 1))
#' ql_matrix(x)
#'
#' @export
ql_matrix <- function(data, vars = colnames(data), p = 0.05, ...) {
if (ncol(data) < 2)
stop("Must supply of data frame with at least 2 columns")
if (any(!vars %in% colnames(data)))
stop("All given variables must appear in the data frame")
ll <- lapply(vars, function(varname) {
g <- as.factor(data[[varname]])
m <- vapply(colnames(data)[colnames(data) != varname], function(s) {
tapply(data[[s]], g, function(v) stats::quantile(v, p, ...) %>% round)
}, numeric(nlevels(g)))
# If no obs make sure we still appear in output table
if (class(m) == "matrix" && nrow(m) == 0)
m <- rbind(m, NA)
if (nlevels(g) == 1)
m <- t(m)
vns <- paste(varname, rownames(m) %||% 0)
as.data.frame(m) %>% cbind(vns, stringsAsFactors = F)
})
f.df <- if (length(ll) > 1)
Reduce(function(m, n) merge(m, n, all = T), ll)
else
ll[[1]]
rownames(f.df) <- f.df$vns
f.df$vns <- NULL
f.m <- as.matrix(f.df)
row_sums <- rowSums(f.m, na.rm = T)
col_sums <- colSums(f.m, na.rm = T)
f.m <- cbind(f.m, sums = row_sums)
f.m <- rbind(f.m, sums = c(col_sums, NA))
f.m <- f.m[order(f.m[, ncol(f.m)], decreasing = T),
order(f.m[nrow(f.m), ], decreasing = F)]
structure(f.m, class = c("ql_mat", class(f.m)))
}
add_empty <- function(x, y) {
fn <- function(missing) as.list(setNames(rep(NA, length(missing)), missing))
empty_cols <- setdiff(colnames(y), colnames(x)) %>% fn
empty_rows <- setdiff(rownames(y), rownames(x)) %>% fn
x2 <- do.call(cbind, c(list(x), empty_cols))
final <- do.call(rbind, c(list(x2), empty_rows))
final[order(rownames(final)), order(colnames(final))]
}
#' @export
combine <- function(x, y) UseMethod("combine")
#' @export
combine.ql_mat <- function(x, y) {
rows <- intersect(rownames(x), rownames(y)) %>% setdiff("sums")
cols <- intersect(colnames(x), colnames(y)) %>% setdiff("sums")
# We only want to combine unique ql matrices
if (length(cols) > 1 & any(!is.na(x[rows, cols]) & !is.na(y[rows, cols])))
stop("Overlapping matrices, unable to combine")
x <- add_empty(x, y)
y <- add_empty(y, x)
out <- matrix(NA, nrow(x), ncol(x), dimnames = list(rownames(x), colnames(x)))
out[] <- vapply(1:length(x), function(i) {
# We're only adding together for the sums cols
ifelse(is.na(x[i]) | is.na(y[i]), x[i] %||% y[i], x[i] + y[i])
}, numeric(1))
rows <- setdiff(rownames(out), "sums")
cols <- setdiff(colnames(out), "sums")
out <- out[c(rows, "sums"), c(cols, "sums")]
out <- out[order(out[, ncol(out)], decreasing = T),
order(out[nrow(out), ], decreasing = F)]
structure(out, class = c("ql_mat", class(out)))
}
#' @export
print.ql_mat <- function(x, ...) {
class(x) <- "matrix"
print(x, ...)
}
#' @export
is.ql_mat <- function(x) {
inherits(x, "ql_mat")
}
#' @export
as.list.ql_mat <- function(x, ...) {
# Let's start by stripping the sums
x <- x[-which(rownames(x) == "sums"), -which(colnames(x) == "sums")]
# TODO: properly split the matrix, or maybe just use data frames
vars <- sub("\\s\\d+$", "", rownames(x)) %>% unique
ll <- lapply(vars, function(s) {
m <- t(x[grepl(s, rownames(x)),, drop = F])
m[-which(rownames(m) == s), order(colnames(m)), drop = F]
})
stats::setNames(ll, vars)
}
#' @export
summary.ql_mat <- function(object, ...) {
# Let's start by stripping the sums
object <- object[-which(rownames(object) == "sums"), -which(colnames(object) == "sums")]
# TODO: properly split the matrix, or maybe just use data frames
vars <- sub("\\s\\d+$", "", rownames(object)) %>% unique
ll <- lapply(vars, function(s) {
m <- t(object[grepl(s, rownames(object)),, drop = F])
m <- m[, order(colnames(m)), drop = F]
nr_states <- ncol(m)
state_sum <- sum(m[, nr_states], na.rm = T)
total_sum <- sum(m, na.rm = T)
c("Num of States" = nr_states, "Num of Dependents" = nrow(m),
"Matrix Column Sum" = state_sum, "Total Sum" = total_sum)
})
out <- do.call(rbind, ll)
rownames(out) <- vars
idx <- as.numeric(out[, ncol(out)]) %>% order(decreasing = T)
out[idx, ]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.