R/pair_methods.R
In bullseye: Visualising Multiple Pairwise Variable Correlations and Other Scores

Documented in ace_cor pair_ace pair_cancor pair_chi pair_cor pair_dcor pair_gkGamma pair_gkTau pair_mine pair_nmi pair_polychor pair_polyserial pair_tauA pair_tauB pair_tauC pair_tauW pair_uncertainty pairwise_by

#' Pearson, Spearman or Kendall correlation
#'
#' Calculates one of either pearson, spearman or kendall correlation for every numeric variable pair in a dataset.
#'
#' @param d A dataframe
#' @param method A character string for the correlation coefficient to be calculated. Either "pearson" (default),
#'               "spearman", or "kendall". If the value is "all", then all three correlations are calculated.
#' @param handle.na If TRUE uses pairwise complete observations to calculate correlation coefficient, otherwise NAs not handled.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with calculated association value for every numeric variable pair, 
#' or NULL if there are not at least two numeric variables
#' @seealso See \code{\link{pair_methods}}  for other score options.
#' @export
#'
#' @examples
#' pair_cor(iris)
#' pair_cor(iris, method="kendall")
#' pair_cor(iris, method="spearman")
#' pair_cor(iris, method="all")


pair_cor <- function(d, method="pearson", handle.na=TRUE,...){
  check_df(d)
  d <- d[, sapply(d, is.numeric), drop=FALSE]
  if (ncol(d) > 1){
    if (method == "all"){
      pearson <- pair_cor(d,method="pearson", handle.na = handle.na)
      spearman <- pair_cor(d,method="spearman", handle.na = handle.na)
      kendall <- pair_cor(d,method="kendall", handle.na = handle.na)
      dplyr::bind_rows(pearson ,spearman, kendall)
    }
    else {if (handle.na)
      dcor <- cor(d,method=method,use="pairwise.complete.obs")
    else dcor <- cor(d,method=method,...)
    pairwise(dcor, score=method, pair_type = "nn")
    }
  }
}




ccor <- function(x,y, handle.na=TRUE){
  if(handle.na){
    pick <- complete.cases(x, y)
    x <- x[pick]
    y <- y[pick]
  }
  
  if (length(x) <= 2) {
    return (NA)
  }
  if (!is.numeric(x))
    x <- sapply(unique(x), function(u) as.numeric(x ==u))[,-1]
  if (!is.numeric(y))
    y <- sapply(unique(y), function(u) as.numeric(y ==u))[,-1]
  tryCatch(cancor(x,y)$cor[1], error = function(e) {
    # message("Cannot calculate cancor, returning NA")
    NA
  }
  )
}
  
  
#' Canonical correlation
#'
#' Calculates canonical correlation for every variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na If TRUE uses pairwise complete observations to calculate correlation coefficient,, otherwise NAs not handled.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with canonical correlation for every numeric or factor or mixed variable pair
#' @export
#'
#' @examples
#' pair_cancor(iris)

pair_cancor <- function(d,handle.na=TRUE,...){
  check_df(d)
  
  d <- d[, sapply(d, function(x) is.numeric(x) | is.factor(x)), drop=FALSE]
  
  if (ncol(d) > 1){
    a <- pairwise(d, score="cancor")
    fn <- function(x,y){
      x <- d[[x]]
      y <- d[[y]]
      ccor(x,y,handle.na=handle.na)
    }
    
    a$value <- mapply(fn, a$x,a$y, USE.NAMES = FALSE)
    a
  }
}

#' Distance correlation
#'
#' Calculates distance correlation for every numeric variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na If TRUE uses pairwise complete observations to calculate distance correlation, otherwise NAs not handled.
#' @param ... other arguments
#' @details The distance correlation is calculated using \code{\link[energy]{dcor2d}} from \code{energy} package
#' @return A tibble of class `pairwise` with distance correlation for every numeric variable pair,
#' or NULL if there are not at least two numeric variables
#' @export
#'
#' @examples
#' pair_dcor(iris)

pair_dcor <- function(d, handle.na=TRUE,...){
  if (!requireNamespace("energy", quietly = TRUE))
    stop("Please install package 'energy' to use pair_dcor", call.=FALSE)
  check_df(d)
  
  d <- d[, sapply(d, is.numeric), drop=FALSE]
  if(ncol(d)>1){
    dcor <- pairwise(d, score="dcor", pair_type = "nn")
    fn <- function(x,y){
      x <- d[[x]]
      y <- d[[y]]
      if(handle.na){
        pick <- complete.cases(x, y)
        x <- x[pick]
        y <- y[pick]
      }
      sqrt(energy::dcor2d(x,y,...))
    }
    dcor$value <-  mapply(fn, dcor$x,dcor$y, USE.NAMES = FALSE)
    dcor
  }

}




# pair_mine <- function(d, method="mic",handle.na=TRUE,...){
#   if (!requireNamespace("minerva", quietly = TRUE))
#     stop("Please install package 'minerva' to use pair_mine", call.=FALSE)
#   check_df(d)
#   
#   d <- d[, sapply(d, is.numeric), drop=FALSE]
#   if(ncol(d)>1){
#     if (handle.na)
#       dcor <- minerva::mine(d,use="pairwise.complete.obs",...)
#     else dcor <- minerva::mine(d,...)
# 
#     dcor <- dcor[[toupper(method)]]
#     pairwise(dcor, score=method, pair_type = "nn")
#   }
# 
# }


#' MINE family values
#'
#' Calculates MINE family values for every numeric variable pair in a dataset.
#'
#' @param d A dataframe
#' @param method character vector for the MINE value to be calculated. Subset of "MIC","MAS","MEV","MCN","MICR2", "GMIC",  "TIC"  
#' @param handle.na If TRUE uses pairwise complete observations to calculate score, otherwise NAs not handled.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with scores for numeric variable pairs,
#' or NULL if there are not at least two numeric variables
#' @export
#' @details The values are calculated using \code{\link[minerva]{mine}} from \code{minerva}
#' @examples
#'  pair_mine(iris)
#'  pair_mine(iris, method="MAS")

#' @references Reshef, David N., et al. "Detecting novel associations in large data sets."
#' science 334.6062 (2011): 1518-1524
#' 
pair_mine <- function(d, method="MIC",handle.na=TRUE,...){
  if (!requireNamespace("minerva", quietly = TRUE))
    stop("Please install package 'minerva' to use pair_mine", call.=FALSE)
  check_df(d)
  
  d <- d[, sapply(d, is.numeric), drop=FALSE]
  if(ncol(d)>1){
    if (handle.na)
      dcor <- minerva::mine(d,use="pairwise.complete.obs",normalization=TRUE,...)
    else dcor <- minerva::mine(d,normalization=TRUE,...)
    mine_choices <- names(dcor)
    sel_mine <- match.arg(toupper(method), mine_choices, several.ok=TRUE)
    p <- pairwise(dcor[[sel_mine[1]]], score=sel_mine[1], pair_type = "nn")
    for (m in sel_mine[-1]){
      pm <- pairwise(dcor[[m]], score=m, pair_type = "nn")
      p <- rbind(p, pm)
    }
    p |> 
      dplyr::arrange(.data$x, .data$y)
  }
  
}


#' Normalized mutual information
#'
#' Calculates normalized mutual information for every numeric or factor or mixed variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na If TRUE uses pairwise complete observations to calculate normalized mutual information, otherwise NAs not handled.
#' @param ... other arguments
#'
#' @details The normalized mutual information is calculated using \code{\link[linkspotter]{maxNMI}} from linkpotter package
#' @return A tibble of class `pairwise`
#' @export
#'
#' @examples
#' pair_nmi(iris)


pair_nmi <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("linkspotter", quietly = TRUE))
    stop("Please install package 'linkspotter' to use pair_nmi",call.=FALSE)
  check_df(d)
  d <- dplyr::select(d, dplyr::where(is.numeric) | dplyr::where(is.factor) )
  if (ncol(d) > 1){
    nmi <- pairwise(d, score="nmi")
    fn <- function(x,y){
      x <- d[[x]]
      y <- d[[y]]
      if(handle.na){
        pick <- complete.cases(x, y)
        x <- x[pick]
        y <- y[pick]
      }
      mi <- suppressMessages(linkspotter::maxNMI(x,y))
    }
     nmi$value <-  mapply(fn, nmi$x,nmi$y, USE.NAMES = FALSE)
    nmi
    
  }
}


#' Polychoric correlation
#'
#' Calculates Polychoric correlation using  for every factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with polychoric correlation for factor pairs, 
#' or NULL if there are not at least two factor variables
#'
#' @details The polychoric correlation is calculated using the \code{\link[polycor]{polychor}} function from the
#' \code{polycor} package, and assumes factor levels are in the given order. NAs are automatically handled by pairwise omit.
#' @export
#'
#' @examples
#' pair_polychor(iris)


pair_polychor <- function(d,handle.na=TRUE,...){
  # if (!requireNamespace("polychor", quietly = TRUE))
  #   stop("Please install package 'polychor' to use pair_polychor", call.=FALSE)
  check_df(d)
  
  # polychor automatically does pairwise omit
  d <- d[, sapply(d, is.factor), drop=FALSE]
  if(ncol(d)>1){
    pcor <- pairwise(d, score="polychor", pair_type = "ff")
    pcor$value <- mapply(function(x,y) polycor::polychor(d[[x]],d[[y]],...), pcor$x,pcor$y, USE.NAMES = FALSE)
    pcor
  }

}


#' Polyserial correlation
#'
#' Calculates Polyserial correlation using  for every factor-numeric variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with polyserial correlation for factor-numeric pairs, 
#' or NULL if there are not at least one such pair.
#'
#' @details The polyserial correlation is calculated using the \code{\link[polycor]{polyserial}} function from the
#' \code{polycor} package, and assumes factor levels are in the given order. NAs are automatically handled by pairwise omit.
#' @export
#'
#' @examples
#' pair_polyserial(iris)


pair_polyserial <- function(d,handle.na=TRUE,...){
  # if (!requireNamespace("polychor", quietly = TRUE))
  #   stop("Please install package 'polychor' to use pair_polyserial", call.=FALSE)
  check_df(d)
  
  # polychor automatically does pairwise omit
  d <- dplyr::select(d, dplyr::where(is.numeric) | dplyr::where(is.factor) )
  if(ncol(d)>1){
    pser <- pairwise(d, score="polyserial")
    
    fn <- function(x,y){
      if (is.factor(d[[x]]))
        polycor::polyserial(d[[y]],d[[x]])
      else polycor::polyserial(d[[x]],d[[y]])
    }
     pser <- pser[pser$pair_type == "fn",]
    # if (nrow(pser) == 1) return(NULL)
    suppressWarnings(pser$value <- mapply(fn, pser$x,pser$y, USE.NAMES = FALSE))
    pser
  }
  
}






pair_tau <- function(d,method="B",handle.na=TRUE,...){
  # automatically does pairwise omit for A,B,C
  d <- dplyr::select(d, dplyr::where(is.factor))
  if(ncol(d)>1){
      a <- pairwise(d, score=paste0("tau", method), pair_type = "ff")
      fns <- c("A"= DescTools::KendallTauA, "B"=DescTools::KendallTauB, "C" = DescTools::StuartTauC, "W"=
                 DescTools::KendallW)
      fn <- fns[[method]]
      fnlocal <- function(x,y){
        if (length(unique(d[[x]])) <= 1) return(NA)
        if (length(unique(d[[y]])) <= 1) return(NA)
        if (method =="W")
          fn(d[c(x,y)], correct=TRUE, na.rm=handle.na,...)
        else fn(d[[x]],d[[y]],...)
      }
      a$value <- mapply(fnlocal, a$x,a$y, USE.NAMES = FALSE)
      a
    }
}

#' Kendall's tau  B for association between ordinal factors.
#'
#' Calculates Kendall's tau B every  factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param ... other arguments
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @return  A tibble of class `pairwise` with factor pairs, or NULL if there are not at least two factor variables
#'
#' @details Calculated using  \code{\link[DescTools]{KendallTauB}}. Assumes factor levels are in the given order. 
#' NAs are automatically handled by pairwise omit.
#' @export
#' @examples

#'  d <- data.frame(x=rnorm(20), 
#'                  y=factor(sample(3,20, replace=TRUE)), 
#'                  z=factor(sample(2,20, replace=TRUE)))
#'  pair_tauB(d)

pair_tauB <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_tauB ", call.=FALSE)
  check_df(d)
  pair_tau(d, method="B",...)
}

#' Kendall's tau  A for association between ordinal factors.
#'
#' Calculates Kendall's tau A for every  factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return  A tibble of class `pairwise` with factor pairs, or NULL if there are not at least two factor variables
#'
#' @details Calculated using  \code{\link[DescTools]{KendallTauA}}. Assumes factor levels are in the given order. 
#' NAs are automatically handled by pairwise omit.
#' @export
#' @examples
#'  d <- data.frame(x=rnorm(20), 
#'                  y=factor(sample(3,20, replace=TRUE)), 
#'                  z=factor(sample(2,20, replace=TRUE)))
#'  pair_tauA(d)

pair_tauA <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_tauA ", call.=FALSE)
  check_df(d)
  pair_tau(d, method="A",...)
}

#' Stuarts's tau  C for association between ordinal factors.
#'
#' Calculates Stuarts's tau C every  factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return  A tibble of class `pairwise` with factor pairs, or NULL if there are not at least two factor variables
#'
#' @details Calculated using  \code{\link[DescTools]{StuartTauC}}. Assumes factor levels are in the given order. 
#' NAs are automatically handled by pairwise omit.
#' @export
#' @examples
#'  d <- data.frame(x=rnorm(20), 
#'                  y=factor(sample(3,20, replace=TRUE)), 
#'                  z=factor(sample(2,20, replace=TRUE)))
#'  pair_tauC(d)

pair_tauC <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_tauC ", call.=FALSE)
  check_df(d)
  pair_tau(d, method="C")
}

#' Kendall's W for association between ordinal factors.
#'
#' Calculates Kendall's tau W every  factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return  A tibble of class `pairwise` with factor pairs, or NULL if there are not at least two factor variables
#'
#' @details Calculated using  \code{\link[DescTools]{KendallW}}. Assumes factor levels are in the given order.
#'  NAs are automatically handled by pairwise omit.
#' @export
#' @examples
#'  d <- data.frame(x=rnorm(20), 
#'                  y=factor(sample(3,20, replace=TRUE)), 
#'                  z=factor(sample(2,20, replace=TRUE)))
#'  pair_tauW(d)

pair_tauW <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_tauW ", call.=FALSE)
  check_df(d)
  pair_tau(d, method="W")
}

#' Uncertainty coefficient for association between factors.
#'
#' Calculates uncertainty coefficient for every factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with every factor variable pair and uncertainty coefficient value,
#' or NULL if there are not at least two factor variables

#' @details The Uncertainty coefficient is calculated using \code{\link[DescTools]{UncertCoef}} function from the
#' \code{DescTools} package.
#'
#' @export
#'
#' @examples
#'  pair_uncertainty(iris)

pair_uncertainty <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_uncertainty", call.=FALSE)
  check_df(d)
  
  d <- dplyr::select(d, dplyr::where(is.factor))
  if(ncol(d)>1){
    a <- pairwise(d, score="uncertainty", pair_type = "ff")
    a$value <- mapply(function(x,y) DescTools::UncertCoef(d[[x]],d[[y]],...), a$x,a$y, USE.NAMES = FALSE)
    a
  }

}


#' Goodman Kruskal's Tau for association between ordinal factors.
#'
#' Calculates Goodman Kruskal's Tau coefficient for every factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with Goodman Kruskal's Tau for every factor variable pair,
#' or NULL if there are not at least two factor variables
#' @details The Goodman Kruskal's Tau coefficient is calculated using \code{\link[DescTools]{GoodmanKruskalTau}}
#' function from the \code{DescTools} package. Assumes factor levels are in the given order.
#' NAs are automatically handled by pairwise omit.
#' @export
#'
#' @examples
#'  pair_gkTau(iris)

pair_gkTau <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_gkTau", call.=FALSE)
  check_df(d)
  
  d <- dplyr::select(d, dplyr::where(is.factor))
  if(ncol(d)>1){
    a <- pairwise(d, score="gkTau", pair_type = "ff")
    fnlocal <- function(x,y) 
      max(DescTools::GoodmanKruskalTau(d[[x]],d[[y]],...),DescTools::GoodmanKruskalTau(d[[y]],d[[x]],...))
    a$value <- mapply(fnlocal, a$x,a$y, USE.NAMES = FALSE)
    a
  }

}


#' Goodman Kruskal's Gamma for association between ordinal factors.
#'
#' Calculates Goodman Kruskal's Gamma coefficient for every factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with factor variable pairs and Goodman Kruskal's Gamma coefficient, 
#' or NULL if there are not at least two factor variables

#' @details The Goodman Kruskal's Gamma coefficient is calculated using \code{\link[DescTools]{GoodmanKruskalGamma}}
#' function from the \code{DescTools} package. Assumes factor levels are in the given order.
#'  NAs are automatically handled by pairwise omit.
#' @export
#'
#' @examples
#'  pair_gkGamma(iris)

pair_gkGamma <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_gkGamma", call.=FALSE)
  
  check_df(d)
  
  d <- dplyr::select(d, dplyr::where(is.factor))
  if(ncol(d)>1){
    a <- pairwise(d, score="gkGamma", pair_type = "ff")
    a$value <- mapply(function(x,y) DescTools::GoodmanKruskalGamma(d[[x]],d[[y]],...), a$x,a$y, USE.NAMES = FALSE)
    a
  }

}

#' Pearson's Contingency Coefficient for association between  factors.
#'
#' Calculates Pearson's Contingency coefficient for every factor variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na ignored. Pairwise complete observations are used automatically.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with calculated Pearson's contingency coefficient for every factor variable
#' pair, or NULL if there are not at least two factor variables
#' @export
#' @details The Pearson's contingency coefficient is calculated using \code{\link[DescTools]{ContCoef}}.
#' NAs are automatically handled by pairwise omit.
#'
#' @examples
#'  pair_chi(iris)

pair_chi <- function(d,handle.na=TRUE,...){
  if (!requireNamespace("DescTools", quietly = TRUE))
    stop("Please install package 'DescTools' to use pair_chi", call.=FALSE)
  check_df(d)
  
  d <- dplyr::select(d, dplyr::where(is.factor))
  if(ncol(d)>1){
    a <- pairwise(d, score="chi", pair_type = "ff")
    a$value <- mapply(function(x,y) DescTools::ContCoef(d[[x]],d[[y]],...), a$x,a$y, USE.NAMES = FALSE)
    a
  }

}


#' Calculates ace based transformations and correlation, handling missing values and factors.
#'
#' @param x a numeric vector or factor
#' @param y a numeric vector or factor
#' @param handle.na  If TRUE uses pairwise complete observations.
#'
#' @return result of acepack::ace
#' @export
#'
#' @examples ace_cor(iris$Sepal.Length, iris$Species)
ace_cor <- function(x,y,handle.na=TRUE) {
  if (!requireNamespace("acepack", quietly = TRUE))
    stop("Please install package 'acepack' to use ace_cor", call.=FALSE)
  if(handle.na){
    pick <- complete.cases(x, y)
    x <- x[pick]
    y <- y[pick]
  }
  cat <- NULL
  if (is.factor(x)) {
    x <- as.numeric(x)
    cat <- 1
  }
  if (is.factor(y)) {
    y <- as.numeric(y)
    cat <- c(cat,0)
  }
  acepack::ace(x,y, cat=cat)
  
}

#' Alternating conditional expectations correlation
#'
#' Calculates the maximal correlation coefficient from alternating conditional expectations algorithm for every variable pair in a dataset.
#'
#' @param d A dataframe
#' @param handle.na If TRUE uses pairwise complete observations, otherwise NAs not handled.
#' @param ... other arguments
#'
#' @return A tibble of class `pairwise` with a maximal correlation  from the alternating conditional expectations
#' algorithm for every variable pair
#'
#' @details The maximal correlation is calculated using alternating conditional expectations
#' algorithm which find the transformations of variables such that the squared correlation
#' is maximised. The \code{\link[acepack]{ace}} function from \code{acepack} package is used for the
#' calculation.
#' @export
#' @references Breiman, Leo, and Jerome H. Friedman.
#' "Estimating optimal transformations for multiple regression and correlation."
#' Journal of the American statistical Association 80.391 (1985): 580-598.
#'
#' @examples
#'  pair_ace(iris)

pair_ace <- function(d, handle.na = TRUE, ...) {
  if (!requireNamespace("acepack", quietly = TRUE))
    stop("Please install package 'acepack' to use pair_acepack", call.=FALSE)
  check_df(d)
  d <- dplyr::select(d, dplyr::where(is.numeric) | dplyr::where(is.factor) )
  if (ncol(d) > 1){
    ace_score <- pairwise(d, score = "ace")
    ace_fn <- function(x,y) {
      x <- d[[x]]
      y <- d[[y]]
      s <- max(0,ace_cor(x,y,handle.na=handle.na)[["rsq"]]) # sometimes acepack gives very small negatives
      sqrt(s)
    }

    ace_score$value <- mapply(ace_fn, ace_score$x,ace_score$y, USE.NAMES = FALSE)
    ace_score
  }
}



#' Pairwise score functions available in the package
#'
#' A tibble of score functions along with the types of variable pairs these functions
#' can be applied to. It also contains information regarding the packages used to calculate scores
#' and the range of the values calculated.
#'
#' @return tibble
#' @export
#' @examples
#'  pair_methods

pair_methods <- dplyr::tribble(
  ~name, ~nn, ~ff,  ~fn, ~from, ~range, ~ordinal,
  "pair_cor", TRUE, FALSE, FALSE, "cor", "[-1,1]", NA,
  "pair_dcor", TRUE, FALSE,  FALSE, "energy::dcor2d", "[0,1]", NA,
  "pair_mine", TRUE, FALSE, FALSE, "minerva::mine", "[0,1]",NA,
  "pair_ace", TRUE, TRUE,  TRUE, "acepack::ace", "[0,1]",FALSE,
  "pair_cancor", TRUE, TRUE, TRUE, "cancor", "[0,1]",FALSE,
  "pair_nmi",  TRUE, TRUE,  TRUE, "linkspotter::maxNMI", "[0,1]",FALSE,
  "pair_polychor", FALSE, TRUE, FALSE, "polycor::polychor", "[-1,1]",TRUE,
  "pair_polyserial", FALSE, FALSE, TRUE, "polycor::polyserial", "[-1,1]",TRUE,
  "pair_tauB", FALSE, TRUE, FALSE, "DescTools::KendalTauB", "[-1,1]",TRUE,
  "pair_tauA", FALSE, TRUE, FALSE, "DescTools::KendalTauA", "[-1,1]",TRUE,
  "pair_tauC", FALSE, TRUE, FALSE, "DescTools::StuartTauC", "[-1,1]",TRUE,
  "pair_tauW", FALSE, TRUE, FALSE, "DescTools::KendalW", "[-1,1]",TRUE,
  "pair_gkGamma", FALSE,  TRUE, FALSE, "DescTools::GoodmanKruskalGamma", "[-1,1]",TRUE,
  "pair_gkTau", FALSE,  TRUE, FALSE, "DescTools::GoodmanKruskalTau", "[0,1]",TRUE,
  "pair_uncertainty", FALSE,  TRUE, FALSE, "DescTools::UncertCoef", "[0,1]",FALSE,
  "pair_chi", FALSE,  TRUE,  FALSE, "DescTools::ContCoef", "[0,1]",FALSE,
  "pair_scag", TRUE, FALSE,  FALSE, "scagnostics::scagnostics", "[0,1]",NA,
)


#' Constructs a pairwise result for each level of a by variable.
#'
#' @param d a dataframe
#' @param by a character string for the name of the conditioning variable.
#' @param pair_fun A function returning a `pairwise` from a dataset.
#' @param ungrouped If TRUE calculates the ungrouped score in addition to grouped scores.
#'
#' @return tibble of class "pairwise"
#' @export
#'
#' @examples
#' pairwise_by(iris, by="Species", pair_cor)
#' 
#' 

pairwise_by <- function(d, by, pair_fun, ungrouped=TRUE){
  if (!(by %in% names(d))) cli::cli_abort(c("{.var by} not present in dataset."))
  tab <- table(d[[by]])
  if (any(tab == 1)) cli::cli_abort(c("{by} cannot be used as a grouping variable. Need more than one observation at each level."))
  result <- d |>
    dplyr::group_by(.data[[by]]) |>
    dplyr::group_modify(function(x,y) pair_fun(x)) |>
    dplyr::ungroup() |>
    dplyr::mutate(group=.data[[by]]) |>
    dplyr::select(-dplyr::all_of(by))
  class(result)<-append("pairwise", class(result))
  if (ungrouped){
    overall <- d |>
      dplyr::select(-dplyr::all_of(by)) |>
      pair_fun()
    result <- rbind(result, overall)
  }
  result |> dplyr::arrange(.data$x, .data$y)
}
Any scripts or data that you put into this service are public.
bullseye documentation built on Sept. 11, 2024, 9:24 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
bullseye
Visualising Multiple Pairwise Variable Correlations and Other Scores

R/pair_methods.R
In bullseye: Visualising Multiple Pairwise Variable Correlations and Other Scores

Defines functions pairwise_by pair_ace ace_cor pair_chi pair_gkGamma pair_gkTau pair_uncertainty pair_tauW pair_tauC pair_tauA pair_tauB pair_tau pair_polyserial pair_polychor pair_nmi pair_mine pair_dcor pair_cancor ccor pair_cor

Documented in ace_cor pair_ace pair_cancor pair_chi pair_cor pair_dcor pair_gkGamma pair_gkTau pair_mine pair_nmi pair_polychor pair_polyserial pair_tauA pair_tauB pair_tauC pair_tauW pair_uncertainty pairwise_by

Try the bullseye package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bullseye Visualising Multiple Pairwise Variable Correlations and Other Scores

R/pair_methods.R In bullseye: Visualising Multiple Pairwise Variable Correlations and Other Scores

Defines functions pairwise_by pair_ace ace_cor pair_chi pair_gkGamma pair_gkTau pair_uncertainty pair_tauW pair_tauC pair_tauA pair_tauB pair_tau pair_polyserial pair_polychor pair_nmi pair_mine pair_dcor pair_cancor ccor pair_cor

Documented in ace_cor pair_ace pair_cancor pair_chi pair_cor pair_dcor pair_gkGamma pair_gkTau pair_mine pair_nmi pair_polychor pair_polyserial pair_tauA pair_tauB pair_tauC pair_tauW pair_uncertainty pairwise_by

Try the bullseye package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bullseye
Visualising Multiple Pairwise Variable Correlations and Other Scores

R/pair_methods.R
In bullseye: Visualising Multiple Pairwise Variable Correlations and Other Scores