R/correlation.R

Defines functions validate_mir_names cortest viz_neg_cor_fraction viz_mrna_per_mir corr_mir_mrna

Documented in corr_mir_mrna viz_mrna_per_mir viz_neg_cor_fraction

#' @include all_classes.R
#' @name correlation
#' @rdname correlation
NULL
#' Correlation between miRNA and mRNA expression
#'
#' @describeIn correlation Compute correlation between miRNA and mRNA expression
#' @aliases correlation
#' @description Compute the correlation between the expression of each miRNA and
#'   its potential mRNA targets. If a gene is targeted by a miRNA we expect that
#'   the expression profile of the gene and the miRNA are anticorrelated.
#'
#' @param mir_targets an object of class MirTarget containing the miRNA-mRNA
#'   target interactions. It must contains three columns: mirna_name |
#'   target.gene_name | status.
#'
#' @param mir_mrna_exprs an object of class MirMrnaExprs, as generated by the
#'   function combine_mir_mrna_exprs(). It must contains the expression of
#'   miRNAs and mRNAs.
#' @param method method to be used for correlation analyses. Allowed values are
#'   one of "pearson", "kendall", or "spearman.
#' @param stand logical value. If TRUE the miR/mRNA expression data are
#'   standardized before computing correlation. Default value is TRUE.
#' @param mirs_of_interest a vector containing the names of miRNA for which you
#'   want to compute correlation analysis against target genes. Default value is
#'   "all".
#' @param sortby Specify how to sort the result. Allowed values are one of
#'   "cor.coeff", "p.value", "fdr", or "none": Abbreviation is allowed. Default
#'   value is "cor.coeff".
#' @param show_progress logical value. If TRUE, a progress bar is shown.
#' @return Returns an object of class CorMirTargets which is a data.frame
#'   containing correlation between miRNAs and their mRNAs. Column names of the results are as follow:
#'   \itemize{
#'   \item \bold{mirna_name}: Name of miRNAs
#'   \item \bold{target.gene_name}: The name of target genes
#'   \item \bold{cor.coeff}: Correlation coefficient
#'   \item \bold{p.value}: The p-value of the correlation test
#'   \item \bold{fdr}: Adjusted p-value using BH multiple testing correlation.
#'   This calculated separatly for the targets of each miRNA.
#'   \item \bold{status}: miRNA Target status. P for predicted, V for validated; PV for predicted and validated.
#'   }
#'
#' @author Alboukadel Kassambara \email{[email protected]@gmail.com}
#' @references A. Kassambara. Statistical Tools for High-throughput Data
#'   Analysis. http://www.sthda.com
#' @export
corr_mir_mrna <- function(mir_targets, mir_mrna_exprs, mirs_of_interest = "all",
                          method = "pearson", stand = TRUE,
                          sortby = c("cor.coeff", "p.value", "fdr", "none"),
                          show_progress = TRUE){

  if(!inherits(mir_mrna_exprs, "MirMrnaExprs"))
    stop("The argument mir_mrna_exprs must be an object of class MirMrnaExprs.")
  if(!inherits(mir_targets, "MirTarget"))
    stop("The argument mir_targets must be an object of class MirTarget.")

  sortby <- match.arg(sortby)[1]

  if(stand) mir_mrna_exprs <- scale(mir_mrna_exprs)

  mir_targets <- subset(mir_targets, !is.na(mir_targets$target.gene_name))
  if(mirs_of_interest =="all") mirnas <- unique(as.vector(mir_targets$mirna_name))
  else mirnas <- unique(mirs_of_interest)

  .validate_mir_names(mir_targets, mirnas)


  mir_anti_cor_target = NULL

  if(show_progress) message("\nComputing correlation between miRNAs and targets....\n")
  if(show_progress) progressBar <- txtProgressBar(min = 0, max = length(mirnas), style = 3)

  i = 0
  for(mir in mirnas){
    # miRNA target genes
    tt <- subset(mir_targets, mir_targets$mirna_name == mir)
    gns <- as.vector(tt$target.gene_name)

    # expression
    mir_exprs <- mir_mrna_exprs[, mir]
    gn_exprs <- mir_mrna_exprs[, gns, drop = FALSE ]

    # Correlation
    cor.res <- t(apply(t(gn_exprs), 1, .cortest, mir_exprs, method ))
    colnames(cor.res) <- c("cor.coeff", "p.value")
    cor.res <- as.data.frame(cor.res)
    cor.res$fdr <- p.adjust(cor.res$p.value, method = "BH")
    # cor.res <- subset(cor.res, cor.res$cor.coeff < coeff)
    if(nrow(cor.res) > 0 & sortby!="none") cor.res <- cor.res[order(cor.res[, sortby]), , drop = FALSE]
    # prepare the results
    if(nrow(cor.res)> 0) {
      rownames(cor.res) <- paste0(mir, "::", rownames(cor.res))
      mir_anti_cor_target <- rbind(mir_anti_cor_target, cor.res)
    }

    i <- i+1
    if(show_progress) setTxtProgressBar(progressBar, i)
  }

  if(!is.null(mir_anti_cor_target)){
    # mir_anti_cor_target$target.gene_name <- rownames(mir_anti_cor_target)
    mir_anti_cor_target$id <-  rownames(mir_anti_cor_target)
    mir_targets$id <- rownames(mir_targets)
    mir_anti_cor_target <- merge(mir_anti_cor_target,
                                 mir_targets,
                                 by.x = "id", by.y ="id", all.x = TRUE,
                                 sort = FALSE)
    clnames <- c("mirna_name", "target.gene_name", "cor.coeff", "p.value", "fdr", "status")
    mir_anti_cor_target <- mir_anti_cor_target[, clnames, drop = FALSE]
  }

  if(show_progress) {
    close(progressBar)
    message("**Finished**")
  }

  structure(mir_anti_cor_target, class = c("data.frame", "CorMirTarget"))
}



#' @describeIn correlation Visualize the number of correlated mRNAs per miRNA
#' @param object an object of class CorMirTarget as generated using
#' @param coeff correlation coefficient threshold. miRNAs having absolute
#' @param lab.size size of x axis labels
#' @param lab.angle rotation angle of x axis labels
#'   correlation >= coeff with one or more mRNAs are kept.
#' @export
viz_mrna_per_mir <- function(object, coeff = 0.5, lab.size = 8, lab.angle = 45,
                             output = c("plot", "table"))
  {

  output <- match.arg(output)

  if(!inherits(object, "CorMirTarget"))
    stop("An object of class CorMirTarget is required.")
  object <- subset(object, abs(object$cor.coeff) >= abs(coeff))

  # Negative and positive correlation
  neg.cor <- subset(object, object$cor.coeff < 0 )
  pos.cor <- subset(object, object$cor.coeff >  0 )

  # Count the number of positive and negative correlation per miRNA
  neg.cor <- summary(as.factor(neg.cor$mirna_name))
  neg.cor <- data.frame(mirna =  names(neg.cor), neg.cor = neg.cor)

  pos.cor <- summary(as.factor(pos.cor$mirna_name))
  pos.cor <- data.frame(mirna =  names(pos.cor), pos.cor = pos.cor)

  # Merge
  cor.count <- merge(neg.cor, pos.cor, by.x = "mirna", by.y = "mirna",
                     all = TRUE, sort = FALSE)
  cor.count$neg.cor[is.na(cor.count$neg.cor)]  <- 0
  cor.count$pos.cor[is.na(cor.count$pos.cor)]  <- 0
  cor.count$tot <- cor.count$neg.cor + cor.count$pos.cor
  cor.count <- cor.count[order(cor.count$tot, decreasing = TRUE), ]

  if(output == "table") return(cor.count)
  # Melt
  cor.count2 <- reshape2::melt(cor.count, measure.vars = c("neg.cor", "pos.cor"))
  cor.count2$value[is.na(cor.count2$value)] <- 0
  cor.count2$variable <- relevel(cor.count2$variable, "neg.cor")
  # cor.count2 <- cor.count2[order(cor.count2$value, decreasing = TRUE), ]
  ggplot2::ggplot(cor.count2, ggplot2::aes_string("mirna", "value")) +
    ggplot2::geom_bar(stat = "identity", ggplot2::aes_string(fill = "variable"))+
    ggplot2::theme_minimal() +
    ggplot2::scale_x_discrete(limits = cor.count$mirna)+
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle=lab.angle, size = lab.size)) +
    ggplot2::labs(y = "Number of coexpressed mRNAs", x = NULL, fill = "Count")
}


#' @describeIn correlation Fraction of negative correlation events
#' @param output allowed values are "plot" or "table"
#' @export
viz_neg_cor_fraction <- function(object, output = c("plot", "table")){
  coeff <- seq(from = 0, to = 0.95, by = 0.05)
  output <- match.arg(output)

  neg.frac <- NULL
  for(.coeff in coeff){
    res.count <- viz_mrna_per_mir(object, coeff = .coeff, output = "table")
    neg.frac <- c(neg.frac, sum(res.count$neg.cor)/sum(res.count$tot))
  }

  res <- data.frame(coeff = coeff, neg.frac = neg.frac)
  if(output == "table") return(res)
  plot(res, xlab = "Threshold of absolute correlation",
       ylab = "Fraction of negative co-expression events.",
       type = "b")

}




# A helper function
.cortest <- function(x, y, method = "pearson"){
  x <- as.numeric(x)
  y <- as.numeric(y)
  res <- cor.test(x, y, method = method)
  c(round(res$estimate,3), res$p.value)
}

# Check if miRNA names provided by users is valid
.validate_mir_names <- function(mir_targets, names){
  dd <- setdiff(names, mir_targets$mirna_name)
  if(length(dd)!=0) stop("Can't found the following miRNA names in your data: ",
                         paste(dd, collapse =", "))
}
kassambara/miRTarget documentation built on May 18, 2017, 4:15 p.m.