R/pairwise_yules_y.R

Defines functions tidy.pairwise_yules_y print.pairwise_yules_y pairwise_yules_y

Documented in pairwise_yules_y

#' Pairwise Yule's Y
#'
#' Calculates the pairwise Yule's Y (Yule's Sigma, coefficient of colligation).
#'
#' @param data A data frame.
#' @param cols Columns to analyze.
#' @param case_weights An optional column of case weights.
#'
#' @details
#' * Metric: Similarity
#' * Symmetrical: Yes
#' * Upper Limit: 1
#' * Lower Limit: -1
#'
#' |       |       | **B** |       |
#' |-------|-------|-------|-------|
#' |       |       | **1** | **0** |
#' | **A** | **1** |   a   |   b   |
#' |       | **0** |   c   |   d   |
#'
#' *Yule's Y* = `(sqrt(a*d)-sqrt(b*c))/(sqrt(a*d)+sqrt(b*c))`
#'
#' @return A matrix.
#'
#' @importFrom dplyr select pull mutate as_tibble
#' @importFrom tidyr pivot_longer
#' @importFrom collapse dapply qtab
#' @importFrom rlang abort
#' @importFrom glue glue
#' @importFrom gdata `lowerTriangle<-` upperTriangle
#'
#' @examples
#' pairwise_yules_y(
#'     data = FoodSample,
#'     cols = Bisque:Turkey
#' )
#'
#' @export
pairwise_yules_y <- function(
        data, cols, case_weights
) {


    # Parse out data ----------------------------------------------------------

    # grab the data needed
    X <- select(data, {{cols}})

    # make sure data passes `is_onezero()`
    oz.check <- dapply(
        X = X,
        FUN = is_onezero,
        MARGIN = 2,
        drop = TRUE
    )

    if (any(!oz.check)) {
        bad.cols <-
            oz.check %>%
            names() %>%
            paste(collapse = ", ")

        abort(glue(
            "All columns in `cols` must meet criteria of `is_onezero()`, the following do not:\n{bad.cols}"
        ))

    }

    # convert to factors
    X <- dapply(X = X, FUN = function(x) factor(x, levels = c(1, 0)))

    # deal with weights
    if (missing(case_weights)) {
        w <- rep(1, times = nrow(data))
    } else {
        w <- pull(data, {{case_weights}})
        if (!is.numeric(w)) {
            abort("Input to `case_weight` must be a numeric column.")
        }
    }


    # Initialize --------------------------------------------------------------

    items   <- colnames(X)
    n.items <- length(items)

    m <- matrix(
        nrow = n.items,
        ncol = n.items,
        dimnames = list(items, items)
    )


    # Calculations ------------------------------------------------------------

    for (i in seq_along(items)) {
        for (j in seq_along(items)) {


            if (i >= j) {
                next
            }

            ct <- qtab(
                item_i = X[[i]],
                item_j = X[[j]],
                w = w
            )

            a <- ct[1, 1]
            b <- ct[1, 2]
            c <- ct[2, 1]
            d <- ct[2, 2]

            m[i, j] <- (sqrt(a*d)-sqrt(b*c))/(sqrt(a*d)+sqrt(b*c))

        }
    }


    # Final formatting and return ---------------------------------------------

    lowerTriangle(m) <- upperTriangle(m, byrow = TRUE)

    dimnames(m) <- list(
        "Var A" = rownames(m),
        "Var B" = colnames(m)
    )

    class(m) <- c(class(m), "pairwise_yules_y")

    m

}

#' @exportS3Method print pairwise_yules_y
print.pairwise_yules_y <- function(x, digits = 3, ...) {

    cli::cat_line("Yule's Y Similarity")
    x <- round(x, digits = digits)
    print.default(unclass(x), na.print = "")

}

#' @importFrom generics tidy
#' @export
generics::tidy

#' @exportS3Method tidy pairwise_yules_y
tidy.pairwise_yules_y <- function(x, ...) {

    x %>%
        as_tibble(rownames = "var_a") %>%
        pivot_longer(
            cols = -1,
            names_to = "var_b",
            values_to = "yules_y"
        ) %>%
        mutate(yules_y = as.numeric(yules_y))
}

utils::globalVariables(c(
    "yules_y"
))
ttrodrigz/onezero documentation built on May 9, 2023, 2:59 p.m.