R/pair_count.R

Defines functions pair_count_

#' Clone implementation of now deprecated tidytext::pair_count for backward compatibility in Exploratory.
#' @export
pair_count <- function (df,
                        group,
                        value,
                        distinct = TRUE,
                        diag = FALSE,
                        sort = TRUE,
                        unite = FALSE,
                        group_by = NULL){

  group_col <- col_name(substitute(group))
  value_col <- col_name(substitute(value))
  group_by_col <- col_name(substitute(group_by))

  pair_count_(df,
              group_col,
              value_col,
              distinct = distinct,
              diag = diag,
              sort = sort,
              unite = unite,
              group_by_col = group_by_col)
}
#' Clone implementation of now deprecated tidytext::pair_count_ for backward compatibility in Exploratory.
pair_count_ <- function(df,
                         group_col,
                         value_col,
                         distinct = FALSE,
                         diag = FALSE,
                         sort = FALSE,
                         unite = FALSE,
                         group_by_col = NULL) {
  validate_empty_data(df)
  # if group_by_col is provided, ungroup the data frame then do group_by.
  if(!is.null(group_by_col)) {
    df <- df %>% dplyr::ungroup() %>% dplyr::group_by(!!rlang::sym(group_by_col))
  }
  # If the data frame is grouped, perform the pair_count for each group.
  # NOTE: dplyr::group_modify works even if the data frame is not grouped.
  dplyr::group_modify(df, .f = function(.x, ...){
    #' Clone implementation of now deprecated tidytext::pair_count_ for backward compatibility in Exploratory.
    loadNamespace("Matrix")
    # sparse matrix by group rows and value columns
    sparse_tab <- xtabs(data = .x[, c(group_col, value_col)], sparse = TRUE, exclude = NULL, na.action=na.pass)

    # this is cross product, so this returns cross table of values count
    count_mat <- Matrix::t(sparse_tab > 0) %*% (sparse_tab > 0)

    # gather the matrix
    cnames <- c(stringr::str_c(value_col, c(".x", ".y")), "value")
    cnames_wihtout_value <- c(stringr::str_c(value_col, c(".x", ".y")))
    ret <- upper_gather(count_mat, diag = diag, cnames = cnames)
    ret <- ret[ret[[3]] > 0, ]

    if(!distinct){
      # bind rows by swapping the first and second columns
      # remove self to avoid duplicate
      copy <- upper_gather(count_mat, diag = FALSE, cnames = cnames[c(2,1,3)])
      copy <- copy[copy[[3]] > 0 & !is.na(copy[[3]]), ]
      ret <- dplyr::bind_rows(ret, copy)
    }

    if (sort) {
      # sort based on the last column
      ret <- ret[sort.list(ret[, ncol(ret)], decreasing=TRUE),]
    }
    if (unite) {
      ret <- ret %>% tidyr::unite(col = !!rlang::sym(value_col), cnames_wihtout_value)
    }
    # reset row names because binding and sorting mess up them
    rownames(ret) <- NULL
    ret
  }, .keep = TRUE) %>% dplyr::ungroup()
}
exploratory-io/exploratory_func documentation built on April 23, 2024, 9:15 p.m.