#' Pairwise Hamming/Manhattan Distance
#'
#' Calculates the pairwise Hamming/Manhattan distance (these are equivalent
#' with binary/dichotomous data). The normalized verision is equivalent to
#' the probability of union.
#'
#' @details
#' * Metric: Distance
#' * Symmetrical: Yes
#' * Upper Limit: Inf (1 when normalized)
#' * Lower Limit: 0
#'
#' | | | **B** | |
#' |-------|-------|-------|-------|
#' | | | **1** | **0** |
#' | **A** | **1** | a | b |
#' | | **0** | c | d |
#'
#' *Hamming* = `b+c` \cr{}
#' *Normalized* = `(b+c)/N`
#'
#' @md
#'
#' @param data A data frame.
#' @param cols Columns to analyze.
#' @param case_weights An optional column of case weights.
#' @param norm Whether or not to normalize the distance, default is `FALSE`.
#'
#' @return A matrix.
#'
#' @importFrom dplyr select pull mutate as_tibble
#' @importFrom tidyr pivot_longer
#' @importFrom collapse dapply qtab
#' @importFrom rlang abort
#' @importFrom glue glue
#' @importFrom gdata `lowerTriangle<-` upperTriangle
#'
#' @examples
#' res <- pairwise_hamming(
#' data = FoodSample,
#' cols = Bisque:Turkey
#' )
#'
#' print(res)
#'
#' tidy(res)
#'
#' @export
pairwise_hamming <- function(
data, cols, case_weights, norm = FALSE
) {
# Parse out data ----------------------------------------------------------
# grab the data needed
X <- select(data, {{cols}})
# make sure data passes `is_onezero()`
oz.check <- dapply(
X = X,
FUN = is_onezero,
MARGIN = 2,
drop = TRUE
)
if (any(!oz.check)) {
bad.cols <-
oz.check %>%
names() %>%
paste(collapse = ", ")
abort(glue(
"All columns in `cols` must meet criteria of `is_onezero()`, the following do not:\n{bad.cols}"
))
}
# convert to factors
X <- dapply(X = X, FUN = function(x) factor(x, levels = c(1, 0)))
# deal with weights
if (missing(case_weights)) {
w <- rep(1, times = nrow(data))
} else {
w <- pull(data, {{case_weights}})
if (!is.numeric(w)) {
abort("Input to `case_weight` must be a numeric column.")
}
}
# Initialize --------------------------------------------------------------
items <- colnames(X)
n.items <- length(items)
m <- matrix(
nrow = n.items,
ncol = n.items,
dimnames = list(items, items)
)
# Calculations ------------------------------------------------------------
for (i in seq_along(items)) {
for (j in seq_along(items)) {
if (i >= j) {
next
}
ct <- qtab(
item_i = X[[i]],
item_j = X[[j]],
w = w
)
b <- ct[1, 2]
c <- ct[2, 1]
ss <- sum(ct)
m[i, j] <- b + c
if (norm) {
m[i, j] <- (b+c)/ss
} else {
m[i, j] <- b+c
}
}
}
# Final formatting and return ---------------------------------------------
lowerTriangle(m) <- upperTriangle(m, byrow = TRUE)
dimnames(m) <- list(
"Var A" = rownames(m),
"Var B" = colnames(m)
)
if (norm) {
class(m) <- c(class(m), "pairwise_hamming_norm")
} else {
class(m) <- c(class(m), "pairwise_hamming")
}
m
}
#' @rdname pairwise_hamming
#'
#' @export
pairwise_manhattan <- pairwise_hamming
#' @rdname pairwise_hamming
#'
#' @export
pairwise_discordant <- pairwise_hamming
#' @exportS3Method print pairwise_hamming
print.pairwise_hamming <- function(x, digits = 3, ...) {
cli::cat_line("Hamming Distance")
x <- round(x, digits = digits)
print.default(unclass(x), na.print = "")
}
#' @importFrom generics tidy
#' @export
generics::tidy
#' @exportS3Method tidy pairwise_hamming
tidy.pairwise_hamming <- function(x, ...) {
x %>%
as_tibble(rownames = "var_a") %>%
pivot_longer(
cols = -1,
names_to = "var_b",
values_to = "hamming"
) %>%
mutate(hamming = as.numeric(hamming))
}
#' @exportS3Method print pairwise_hamming_norm
print.pairwise_hamming_norm <- function(x, digits = 3, ...) {
cli::cat_line("Hamming Distance (Normalized)")
x <- round(x, digits = digits)
print.default(unclass(x), na.print = "")
}
#' @importFrom generics tidy
#' @export
generics::tidy
#' @exportS3Method tidy pairwise_hamming_norm
tidy.pairwise_hamming_norm <- function(x, ...) {
x %>%
as_tibble(rownames = "var_a") %>%
pivot_longer(
cols = -1,
names_to = "var_b",
values_to = "hamming_norm"
) %>%
mutate(hamming_norm = as.numeric(hamming_norm))
}
utils::globalVariables(c(
"hamming", "hamming_norm"
))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.