R/FindContrasts.R

Defines functions FindContrasts

# -=-=-=- Phylogenetically Independent Contrast analysis -=-=-=-

# Produces a vector with the correlation of each ontology term with the
# attribute in question after correcting for phylogenetic bias (see
# Felsenstein 1985 and APE package for details).
#
# Args:
#   x: (vector) variable with the counting of an attribute of
#               interest, like G+C, gene count or longevity.
#   y: (data.frame) table counting the presence of annotations
#                   of an ontology in each genome.
#   method: (char) method to use, allows "pearson", "spearman" and "kendall".
#   denominator: (numeric) parameter for normalization of the y variable.
#
# TODO: Check documentation of 'method' above. It is inconsistent with
#       the function definition (which accepts method = "gls" or method = "pic")
#
# Returns:
#   correlations: (vector) correlation of all listed ontology terms for the
#                          attribute in question.

FindContrasts <- function(x, y, tree, method = "gls", denominator = 1,
                          cores = 1) {

  # ================== Sanity checks ==================
  assertthat::assert_that(is.data.frame(x),
                          is.data.frame(y),
                          class(tree) %in% c("phylo", "multiPhylo"),
                          method %in% c("pic", "gls"),
                          is.null(denominator) | is.numeric(denominator),
                          is.null(denominator) | all(denominator > 0))


  tmp_x         <- x[, 1]
  names(tmp_x)  <- rownames(x)

  # Phylogenetically Independent Contrasts
  if (method == "pic") {
    contrast_x <- ape::pic(x = tmp_x, phy = tree)

    # Normalizing
    # FIXED
    # y is a data frame (46 x 45049 in the example) and
    # denominator is a numeric vector (length 46 in the example).
    # is this division *really* what you want? Because it is dividing in a very
    # counterintuitive manner. A possibly better way is suggested below.
    # y <- y / denominator
    if (!is.null(denominator)) y <- sweep(y, MARGIN = 1, denominator, `/`)


    cat("\nComputing contrasts:\n")
    models <- pbmcapply::pbmclapply(y,
                                    function(tmpy, tree, cx, nx){
                                      names(tmpy) <- nx
                                      cy  <- ape::pic(tmpy, phy = tree)
                                      mod <- stats::lm(cy ~ cx + 0)
                                      return(summary(mod)$coefficients[1, 4])},
                                    tree           = tree,
                                    cx             = contrast_x,
                                    nx             = rownames(x),
                                    mc.preschedule = TRUE,
                                    mc.cores = cores)

  } else if (method == "gls") {
    # TODO: method "gls" throws errors both in the original version and the
    # reimplemented version below. Please check.

    # Normalizing
    if (!is.null(denominator)) {
      # Getting counts per million to avoid false convergence (8) error
      # from gls function for small values,
      # see http://r.789695.n4.nabble.com/quot-False-convergence-quot-in-LME-td860675.html
      # y <- (y / denominator) * 10^6
      y <- sweep(y, MARGIN = 1, denominator, `/`) * 10^6
    }

    models <- pbmcapply::pbmclapply(y,
                                    function(tmpy, tmpx, nx, tree){
                                      if(any(tmpy == 0)){
                                        model <- nlme::gls(tmp_y ~ tmp_x,
                                                           data = as.data.frame(cbind(tmpx, tmpy)),
                                                           correlation = ape::corPagel(value = 1,
                                                                                       phy = tree),
                                                           control     = list(singular.ok = TRUE))
                                        return(as.numeric(summary(model)$coefficients[2]))
                                      } else {
                                        return(1)
                                      }},
                                    tmpx = tmp_x,
                                    nx = rownames(x),
                                    tree = tree,
                                    mc.preschedule = TRUE,
                                    mc.cores = cores)
  } else{
    stop("'", method,"' is not a recognized method in KOMODO2::FindContrasts()")
  }

  models        <- unlist(models)
  return(sort(models, decreasing = FALSE))
}
fcampelo/KOMODO2-CRAN documentation built on March 7, 2020, 6:35 a.m.