R/FindCorrelations.R

Defines functions FindCorrelations

# -=-=-=- Correlation analysis -=-=-=-

# Produces a vector with the correlation of each ontology term with the
# attribute in question.
#
# Args:
#   x: (vector) variable with the counting of an attribute of
#               interest, like G+C, gene count or longevity.
#   y: (data.frame) table counting the presence of annotations
#                   of an ontology in each genome.
#   method: (char) method to use, allows "pearson", "spearman" and "kendall".
#   denominator: (numeric) parameter for normalization of the y variable.
# Returns:
#   correlations: (vector) correlation of all listed ontology terms for the
#                          attribute in question.

# Consider adding differentiation for x and y denominator column.
FindCorrelations <- function(x, y, method = "pearson", denominator = 1,
                             cores = 1) {

  # ================== Sanity checks ==================
  assertthat::assert_that(is.data.frame(x),
                          is.data.frame(y),
                          is.null(denominator) | is.numeric(denominator),
                          is.null(denominator) | all(denominator > 0))


  # Normalizing
  # FIXED
  # y is a data frame and denominator is a numeric vector.
  # is this division *really* what you want? Because it is dividing in a very
  # counterintuitive manner. A possibly better way is suggested below.
  if (!is.null(denominator)) y <- sweep(y, MARGIN = 1, denominator, `/`) # y <- y / denominator


  cat("\nCalculating correlations:", method, "\n")
  tmp <- pbmcapply::pbmclapply(y,
                               function(tmpy, x, method, ny){
                                 mycor <- cor(x[ny, 1], tmpy,
                                              method = method)
                                 mypv  <- cor.test(x[ny, 1], tmpy,
                                                   method = method)$p.value
                                 return(list(mycor = mycor, mypv = mypv))},
                               x              = x,
                               method         = method,
                               ny             = rownames(y),
                               mc.preschedule = TRUE,
                               mc.cores       = cores)

  correlations               <- sapply(tmp, function(x) x$mycor)
  correlations.pvalue        <- sapply(tmp, function(x) x$mypv)
  names(correlations)        <- colnames(y)
  names(correlations.pvalue) <- colnames(y)
  correlations               <- sort(correlations,
                                     decreasing = TRUE)
  correlations.pvalue        <- sort(correlations.pvalue,
                                     decreasing = TRUE)

  return(list(cor         = correlations,
              cor.pvalues = correlations.pvalue))

}
fcampelo/KOMODO2-CRAN documentation built on March 7, 2020, 6:35 a.m.