R/ceRNAAnalysis.R

Defines functions ExportNetwork CeExportNetwork mirCorTestFun multiRegTestFun multiRegFun hyperTestFun ceRNAAnslysis

Documented in ceRNAAnslysis ExportNetwork

##' @title Competing endogenous RNAs (ceRNAs) analysis
##' @description Identify ceRNAs by
##'     (1) number of shared miRNAs between lncRNA and mRNA;
##'     (2) expression correlation of lncRNA and mRNA;
##'     (3) regulation similarity of shared miRNAs on lncRNA and mRNA;
##'     (4) sensitivity correlation
##' @param lnc a vector of Ensembl long non-coding gene ids
##' @param pc a vector of Ensembl protein coding gene ids
##' @param deMIR a vector of differentially expressed miRNAs.
##'     Default is \code{NULL}
##' @param lnc.targets a character string specifying the database
##'     of miRNA-lncRNA interactions.
##'     Should be one of \code{'spongeScan'}, \code{'starBase'},
##'     and \code{'miRcode'}. Default is \code{'starBase'}. \cr\cr
##'     Or a \code{list} of miRNA-lncRNA interactions generated by users
##' @param pc.targets a character string specifying the database of
##'     miRNA-lncRNA interactions.
##'     Should be one of \code{'spongeScan'}, \code{'starBase'},
##'     and \code{'miRcode'}. Default is \code{'starBase'}. \cr\cr
##'     Or a \code{list} of miRNA-lncRNA interactions generated by users
##' @param rna.expr normalization mRNAs counts datafram,contain lnc and pc
##'     transformed gene expression data
##' @param mir.expr normalization miRNAs counts datafram
##'     transformed mature miRNA expression data
##' @return A dataframe containing ceRNA pairs, expression correlation
##'     between lncRNA and mRNA, the number and hypergeometric significance
##'     of shared miRNAs, regulation similarity score, and the mean sensitity
##'     correlation (the difference between Pearson correlation and partial
##'     correlation) of multiple lncRNA-miRNA-mRNA triplets, etc.
##' @export
##' @references
##'     Paci P, Colombo T, Farina L. Computational analysis identifies
##'     a sponge interaction network  between long non-coding RNAs and
##'     messenger RNAs in human breast cancer.  BMC systems biology.
##'     2014 Jul 17;8(1):83.
ceRNAAnslysis <- function(lnc, pc, deMIR=NULL, lnc.targets='starBase',
                          pc.targets='starBase', rna.expr, mir.expr) {

  hyperOutput <- hyperTestFun(lnc, pc, deMIR,
                              lnc.targets=lnc.targets, pc.targets=pc.targets)
  message ('Step 1/3: Hypergenometric test done !')

  regOutput <- multiRegTestFun(hyperOutput, rna.expr=rna.expr,
                               mir.expr=mir.expr)
  message ('Step 2/3: Correlation analysis done !\n',
           'Step 3/3: Regulation pattern analysis done !')

  ceOutput <- data.frame(hyperOutput, regOutput, row.names=NULL)

  return(ceOutput)
}


#### hypergeometric test
hyperTestFun <- function(lnc, pc, deMIR,
                         lnc.targets='starBase', pc.targets='starBase') {

  if (length(lnc.targets) > 1) {
    lnc.targets <- lnc.targets
  } else {
    lnc.targets <- lncTargets[[lnc.targets]]
  }

  if (length(pc.targets) > 1) {
    pc.targets <- pc.targets
  } else {
    pc.targets <- pcTargets[[pc.targets]]
  }

  mir1 <- unique(unlist(lnc.targets))
  mir2 <- unique(unlist(pc.targets))

  mirs <- union(mir1,mir2)
  popTotal <- length(mirs)

  ceLNC <- lnc[lnc %in% names(lnc.targets)]
  cePC <- pc[pc %in% names(pc.targets)]
  #ceMIR <- mir[mir %in% mirs]

  hyperOutput <- list()
  i <- 0
  for (lncID in ceLNC) {
    listTotal <- length(lnc.targets[[lncID]])
    for (gene in cePC) {
      i = i + 1
      ovlp <- intersect(lnc.targets[[lncID]], pc.targets[[gene]])

      popHits <- length(pc.targets[[gene]])
      Counts <- length(ovlp)

      ovlpMIRs <- paste(ovlp, collapse = ',')
      foldEnrichment <- Counts/listTotal*popTotal/popHits
      pValue <- phyper(Counts-1, popHits, popTotal-popHits,
                       listTotal, lower.tail=FALSE, log.p=FALSE)

      ceMIR <- Reduce(intersect, list(ovlp, deMIR))
      deMIRs <- paste(ceMIR, collapse = ',')
      deMIRCounts <- length(ceMIR)

      hyperOutput[[i]] <- c(lncID, gene, Counts, listTotal,
                            popHits,popTotal,foldEnrichment,pValue,ovlpMIRs,
                            deMIRCounts, deMIRs)

    }
  }

  #hyperOutput <- Reduce(rbind, hyperOutput)  ## slower
  hyperOutput <- do.call(rbind, hyperOutput)
  #hyperOutput <- rbind_list(hyperOutput) ## not test

  colnames(hyperOutput) <- c('lncRNAs','Genes','Counts','listTotal',
                             'popHits','popTotal','foldEnrichment','hyperPValue','miRNAs',
                             'deMIRCounts','deMIRs')
  hyperOutput <- as.data.frame(as.matrix(hyperOutput),
                               stringsAsFactors=FALSE)
  hyperOutput <- hyperOutput[as.numeric(hyperOutput$Counts)>0,]

  #hyperOutput$FDR <- p.adjust(as.numeric(as.character(hyperOutput$pValue)),
  #method = 'fdr')
  #hyperOutput <- hyperOutput[hyperOutput$Counts>0,]
  #hyperOutput$lncRNAs <- ensembl2symbolFun(hyperOutput$lncRNAs)
  #hyperOutput$gene <- ensembl2symbolFun(hyperOutput$gene)

  if (is.null(deMIR)) {
    hyperOutput <- hyperOutput[,! colnames(hyperOutput) %in%
                                 c('deMIRCounts','deMIRs')]
  }

  return (hyperOutput)
}



########################################################
######## other scores
multiRegFun <- function(lnc, pc, mirs, rna.expr, mir.expr) {
  lncDa <- unlist(rna.expr[lnc,])
  pcDa <- unlist(rna.expr[pc,])

  corpl <- cor.test(pcDa, lncDa, alternative='greater')
  ppl <- corpl$p.value
  regpl <- corpl$estimate

  mirs <- as.character(mirs)

  if (mirs == '') {
    regSim=NA
    sppc = NA

  } 
  else {
    mirs <- unlist(strsplit(mirs, ',', fixed=TRUE))
    mirCor <- vapply(mirs, function(mir)
      mirCorTestFun(lncDa, pcDa, mir, mir.expr),
      numeric(2))

    reglm <- mirCor[1,]
    regpm <- mirCor[2,]

    regSim <- 1-mean((abs(reglm - regpm)/(abs(reglm) + abs(regpm)))
                     ^length(mirs))
    #lncACT <- mean((abs(regpl)+abs(reglm)+abs(regpm))/3)
    sppc <- mean(regpl-(regpl-reglm*regpm)/(sqrt(1-reglm^2)*
                                              sqrt(1-regpm^2)))
    #cos <- sum(reglm*regpm)/(sqrt(sum(reglm^2))*sqrt(sum(regpm^2)))
    #col <- sum(abs(reglm)*abs(regpm))/(sqrt(sum(abs(reglm)))*
    #sqrt(sum(abs(regpm))))
    #cosCol <- (cos+col)/2

  }

  #scores <- c(regpl, ppl, reg, lncACT, partialSen, cosCol)
  scores <- c(cor=regpl, corPValue=ppl, regSim, sppc)
  return (scores)
}



multiRegTestFun <- function(hyperOutput, rna.expr, mir.expr) {

  samples <- intersect(colnames(rna.expr), colnames(mir.expr))

  rna.expr <- rna.expr[,samples]
  mir.expr <- mir.expr[,samples]

  lncID <- hyperOutput$lncRNAs
  pcID <- hyperOutput$Genes
  mirID <- hyperOutput$deMIRs


  reg <- vapply(seq_len(nrow(hyperOutput)), function(i)
    multiRegFun(lncID[i], pcID[i], mirID[i], rna.expr, mir.expr),
    numeric(4))

  reg <- t(reg)
  colnames(reg) <- c('cor','corPValue','regSim', 'sppc')
  return (data.frame(reg))
}



mirCorTestFun <- function(lncDa, pcDa, mir, mir.expr) {
  mirDa <- unlist(mir.expr[mir,])

  corlm <- cor.test(lncDa, mirDa, alternative='less')
  corpm <- cor.test(pcDa, mirDa, alternative='less')

  reglm <- corlm$estimate
  regpm <- corpm$estimate

  return (c(reglm, regpm)) ## lnc then pc
}

CeExportNetwork <- function(ceNetwork, net) {

  mirs <- unlist(strsplit(ceNetwork$miRNAs, ',', fixed=TRUE))

  fromNode <- rep(c(ceNetwork$lncRNAs,ceNetwork$Genes),
                  times=rep(as.numeric(ceNetwork$Counts),2))
  toNode <- rep(mirs, 2)
  altNode1Name <- ensembl2symbolFun(fromNode)
  edges <- data.frame(fromNode, toNode, altNode1Name,
                      stringsAsFactors = FALSE)

  #filter <- duplicated(edges)
  #edges <- edges[-filter,]
  edges <- unique(edges)

  nodeTable1 <- table(edges$fromNode)
  nodeTable2 <- table(edges$toNode)

  symbol <- c(ensembl2symbolFun(names(nodeTable1)), names(nodeTable2))

  type <- c(ifelse(names(nodeTable1) %in% ceNetwork$lncRNAs, 'lnc', 'pc'),
            rep('mir', length(nodeTable2)))


  nodes <- data.frame(gene=c(names(nodeTable1), names(nodeTable2)),
                      symbol, type, numInteractions=as.numeric(c(nodeTable1, nodeTable2)))

  if (net=='nodes') {
    return (nodes)
  } else if (net=='edges') {
    return (edges)
  }
}


##' @title Export network for Cytoscape
##' @description Export nodes and edges of ce network for 
##'     \strong{Cytoscape} visualization
##' @param ceNetwork a dataframe generated from \code{\link{ceRNAAnslysis}}
##' @param net one of \code{'nodes'} and \code{'edges'}
##' @return A dataframe of nodes or edges
##' @export
##' @author zexl
ExportNetwork <- function(ceNetwork, net) {
  
  mirs <- unlist(strsplit(ceNetwork$deMIRs, ',', fixed=TRUE))
  
  fromNode <- rep(c(ceNetwork$lncRNAs,ceNetwork$Genes), 
                  times=rep(as.numeric(ceNetwork$deMIRCounts),2))
  toNode <- rep(mirs, 2)
  altNode1Name <- ensembl2symbolFun(fromNode)
  edges <- data.frame(fromNode, toNode, altNode1Name, 
                      stringsAsFactors = FALSE)
  
  #filter <- duplicated(edges)
  #edges <- edges[-filter,]
  edges <- unique(edges)
  
  nodeTable1 <- table(edges$fromNode)
  nodeTable2 <- table(edges$toNode)
  
  symbol <- c(ensembl2symbolFun(names(nodeTable1)), names(nodeTable2))
  
  type <- c(ifelse(names(nodeTable1) %in% ceNetwork$lncRNAs, 'lnc', 'pc'), 
            rep('mir', length(nodeTable2)))
  
  
  nodes <- data.frame(gene=c(names(nodeTable1), names(nodeTable2)), 
                      symbol, type, numInteractions=as.numeric(c(nodeTable1, nodeTable2)))
  
  if (net=='nodes') {
    return (nodes)
  } else if (net=='edges') {
    return (edges)
  }
}
zexllin/lncRNAtools documentation built on Jan. 1, 2021, 1:52 p.m.