R/riborex.r

Defines functions riborex voomRex edgeRDRex edgeRRex DESeq2Rex dataFrameToDesignMatrix combineDesignMatrix correctNullDistribution

Documented in riborex

# Copyright (C) 2016 University of Southern California and
#                    Wenzheng Li, Weili Wang and  Andrew D. Smith
#
# Authors: Wenzheng Li and Weili Wang
#
# This software is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This software is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this software. If not, see
# <http://www.gnu.org/licenses/>.

correctNullDistribution <- function(results) {

  message("correcting null distribution by reestimating pvalues")
  results <- results[!is.na(results$padj), ]
  results <- results[!is.na(results$pvalue), ]
  resultsFDR <- fdrtool(results$stat, 
                        statistic = 'normal',
                        plot = F)
  results[ , 'pvalue'] <- resultsFDR$pval
  results[ ,'padj'] <- p.adjust(resultsFDR$pval, method = 'BH')
  results
}

combineDesignMatrix <- function(rnaCond, riboCond) {

  message("combining design matrix")

  if (!is.data.frame(rnaCond)) rnaCond <- data.frame(cond = rnaCond)
  if (!is.data.frame(riboCond)) riboCond <- data.frame(cond = riboCond)

  if (!identical(colnames(rnaCond), colnames(riboCond)))
      stop("RNA- and Ribo-seq data must have the same set of conditions")

  numCond <- ncol(rnaCond)
  numRNASmps <- nrow(rnaCond)
  numRiboSmps <- nrow(riboCond)

  ### expand rna covariate vector with 0s
  expansion.rna <- rnaCond
  for(i in 1:ncol(expansion.rna)) {
    expansion.rna[,i] <- rnaCond[1,i]
  }
  expansion.rna <- cbind(0, as.data.frame(expansion.rna))
  rnaCond <- cbind(rnaCond, expansion.rna)
  colnames(rnaCond)[(numCond+1):ncol(rnaCond)] <- paste0("EXTRA",
                                                           seq(numCond+1))

  ### expand ribo covariate vector by repeating the same vector
  riboCond <- cbind(riboCond, 1, riboCond)
  colnames(riboCond)[(numCond+1):ncol(riboCond)] <- paste0("EXTRA",
                                                            seq(numCond+1))

  ### combine rna and ribo design matrix
  combinedCond <- rbind(rnaCond, riboCond)

  extendedConds <- paste0("combinedCond$", colnames(combinedCond))
  fmla <- as.formula(paste("~", paste(extendedConds, collapse= "+")))
  model.matrix(fmla)
}

dataFrameToDesignMatrix <- function(cond) {
  if (!is.data.frame(cond)) cond <- data.frame(cond = cond)
  conditions <- paste0("cond$", colnames(cond))
  fmla <- as.formula(paste("~", paste(conditions, collapse= "+")))
  model.matrix(fmla)
}

DESeq2Rex <- function(rnaCntTable, riboCntTable, rnaCond, riboCond,
                      contrast=NULL, minMeanCount=1) {

  ### append sample name with .rna or .ribo
  ### in case user uses sample sample name
  colnames(rnaCntTable) <- paste0(colnames(rnaCntTable), ".rna")
  colnames(riboCntTable) <- paste0(colnames(riboCntTable), ".ribo")

  ### input validation
  if (!identical(rownames(rnaCntTable), rownames(riboCntTable)))
    stop ("RNA- and Ribo-seq data must have the same set of genes")

  if (!is.data.frame(rnaCond)) rnaCond <- data.frame(cond = rnaCond)
  if (!is.data.frame(riboCond)) riboCond <- data.frame(cond = riboCond)

  if (!identical(colnames(rnaCond), colnames(riboCond)))
    stop("RNA- and Ribo-seq data must have the same set of conditions")

  if (ncol(rnaCntTable) != nrow(rnaCond))
    stop(paste("RNA-seq count table must have the",
               "same number of samples as in rnaCond"))

  if (ncol(riboCntTable) != nrow(riboCond))
    stop(paste("Ribo-seq count table must have the",
               "same number of samples as in riboCond"))

  if (minMeanCount < 1)
    stop("minMeanCount must at least be 1")

  ### filter out low read count
  keep.rna <- rownames(rnaCntTable)[rowMeans(rnaCntTable) >= minMeanCount]
  keep.ribo <- rownames(riboCntTable)[rowMeans(riboCntTable) >= minMeanCount]
  keep <- intersect(keep.rna, keep.ribo)
  rnaCntTable <- rnaCntTable[keep,]
  riboCntTable <- riboCntTable[keep,]

  numCond <- ncol(rnaCond)
  numRNASmps <- nrow(rnaCond)
  numRiboSmps <- nrow(riboCond)
  
  ### combine counts
  combCntTbl <- cbind(rnaCntTable, riboCntTable)

  message("combining design matrix")

  combinedCond <- rbind(rnaCond, riboCond)
  combinedCond <- combinedCond[,rep(1:ncol(combinedCond),2)]
  INTERCEPT <- c(rep("CONTROL", numRNASmps), rep("TREATED", numRiboSmps))
  combinedCond <- cbind(combinedCond[1:numCond], INTERCEPT,
                        combinedCond[(numCond+1):ncol(combinedCond)])
  for( i in (numCond+2) : ncol(combinedCond)) {
    combinedCond[1:numRNASmps,i] <- combinedCond[1,i]
  }
  colnames(combinedCond)[(numCond+2):ncol(combinedCond)] <- paste0("EXTRA",
                                                                   seq(numCond))
  extendedConds <- colnames(combinedCond)
  fmla <- as.formula(paste("~", paste(extendedConds, collapse= "+")))
  dds <- DESeqDataSetFromMatrix(countData = combCntTbl,
                                colData = combinedCond,
                                design = fmla)

  message("applying DESeq2 to modified design matrix")

  ## apply new design matrix with combined count table to DESeq2
  dds <- DESeq(dds)
  if(is.null(contrast)) {
    res <- results(dds)
  } else {
    contrast[1] <- paste0("EXTRA", which(colnames(combinedCond)==contrast[1]))
    res <- results(dds, contrast=contrast)
  }

  ## order results by gene names
  res <- res[order(rownames(res)),]
  res
}

edgeRRex <- function(rnaCntTable, riboCntTable, rnaCond, riboCond,
                     contrast=NULL, minMeanCount=1) {

  ### append sample name with .rna or .ribo
  ### in case user uses sample sample name
  colnames(rnaCntTable) <- paste0(colnames(rnaCntTable), ".rna")
  colnames(riboCntTable) <- paste0(colnames(riboCntTable), ".ribo")

  ### input validation
  if (!identical(rownames(rnaCntTable), rownames(riboCntTable)))
    stop ("RNA- and Ribo-seq data must have the same set of genes")

  if (!is.data.frame(rnaCond)) rnaCond <- data.frame(cond = rnaCond)
  if (!is.data.frame(riboCond)) riboCond <- data.frame(cond = riboCond)

  if (!identical(colnames(rnaCond), colnames(riboCond)))
    stop("RNA- and Ribo-seq data must have the same set of conditions")

  if (ncol(rnaCntTable) != nrow(rnaCond))
    stop(paste("RNA-seq count table must have the",
               "same number of samples as in rnaCond"))

  if (ncol(riboCntTable) != nrow(riboCond))
    stop(paste("Ribo-seq count table must have the",
               "same number of samples as in riboCond"))

  if (minMeanCount < 1)
    stop("minMeanCount must at least be 1")

  ### filter out low read count
  keep.rna <- rownames(rnaCntTable)[rowMeans(rnaCntTable) >= minMeanCount]
  keep.ribo <- rownames(riboCntTable)[rowMeans(riboCntTable) >= minMeanCount]
  keep <- intersect(keep.rna, keep.ribo)
  rnaCntTable <- rnaCntTable[keep,]
  riboCntTable <- riboCntTable[keep,]

  ## combine counts
  combCntTbl <- cbind(rnaCntTable, riboCntTable)

  dge <- DGEList(counts = combCntTbl)
  dge <- calcNormFactors(dge)
  design <- combineDesignMatrix(rnaCond, riboCond)
  dge <- estimateDisp(dge, design)

  message("applying edgeR to modified design matrix")

  ## glmFit and glmLRT
  fit <- glmFit(dge, design)
  if(is.null(contrast)) {
    lrt <- glmLRT(fit)
  } else {
    coef = which(colnames(rnaCond) == contrast[1]) + 2 + ncol(rnaCond)
    lrt <- glmLRT(fit, coef = coef)
  }
  topGenes <- topTags(lrt, n=Inf)

  ## order results by gene names
  topGenes <- topGenes[order(rownames(topGenes)),]
  topGenes
}

edgeRDRex <- function(rnaCntTable, riboCntTable, rnaCond, riboCond,
                      contrast=NULL, minMeanCount=1) {

  ### append sample name with .rna or .ribo
  ### in case user uses sample sample name
  colnames(rnaCntTable) <- paste0(colnames(rnaCntTable), ".rna")
  colnames(riboCntTable) <- paste0(colnames(riboCntTable), ".ribo")

  ### input validation
  if (!identical(rownames(rnaCntTable), rownames(riboCntTable)))
    stop ("RNA- and Ribo-seq data must have the same set of genes")

  if (!is.data.frame(rnaCond)) rnaCond <- data.frame(cond = rnaCond)
  if (!is.data.frame(riboCond)) riboCond <- data.frame(cond = riboCond)

  if (!identical(colnames(rnaCond), colnames(riboCond)))
    stop("RNA- and Ribo-seq data must have the same set of conditions")

  if (ncol(rnaCntTable) != nrow(rnaCond))
    stop(paste("RNA-seq count table must have the",
               "same number of samples as in rnaCond"))

  if (ncol(riboCntTable) != nrow(riboCond))
    stop(paste("Ribo-seq count table must have the",
               "same number of samples as in riboCond"))

  if (minMeanCount < 1)
    stop("minMeanCount must at least be 1")

  ### filter out low read count
  keep.rna <- rownames(rnaCntTable)[rowMeans(rnaCntTable) >= minMeanCount]
  keep.ribo <- rownames(riboCntTable)[rowMeans(riboCntTable) >= minMeanCount]
  keep <- intersect(keep.rna, keep.ribo)
  rnaCntTable <- rnaCntTable[keep,]
  riboCntTable <- riboCntTable[keep,]

  ## estimate dispersion from RNA-seq data
  dge.rna <- DGEList(counts = rnaCntTable)
  dge.rna <- calcNormFactors(dge.rna)
  design.rna <- dataFrameToDesignMatrix(rnaCond)
  dge.rna <- estimateDisp(dge.rna, design.rna)

  ## estimate dispersion from Ribo-seq data
  dge.ribo <- DGEList(counts = riboCntTable)
  dge.ribo <- calcNormFactors(dge.ribo)
  design.ribo <- dataFrameToDesignMatrix(riboCond)
  dge.ribo <- estimateDisp(dge.ribo, design.ribo)

  ## combine dispersions
  dispersion.rna <- getDispersion(dge.rna)
  dispersion.ribo <- getDispersion(dge.ribo)
  dispersion <- matrix(c(rep(dispersion.rna, ncol(rnaCntTable)),
                         rep(dispersion.ribo, ncol(riboCntTable))),
                         nrow=dim(rnaCntTable)[1])
  ## combine counts
  combCntTbl <- cbind(rnaCntTable, riboCntTable)
  ## combine size factors
  combFactors <- c(dge.rna$samples$norm.factors,
                   dge.ribo$samples$norm.factors)
  ## create new DGE based on combined count table
  dge <- DGEList(counts = combCntTbl, norm.factors = combFactors)
  ## combine design matrix
  design <- combineDesignMatrix(rnaCond, riboCond)

  message("applying edgeR to modified design matrix")

  ## glmFit and glmLRT
  fit <- glmFit(dge, design=design, dispersion=dispersion)
  if(is.null(contrast)) {
    lrt <- glmLRT(fit)
  } else {
    coef = which(colnames(rnaCond) == contrast[1]) + 2 + ncol(rnaCond)
    lrt <- glmLRT(fit, coef = coef)
  }
  topGenes <- topTags(lrt, n=Inf)

  ## order results by gene names
  topGenes <- topGenes[order(rownames(topGenes)),]
  topGenes
}

voomRex <- function(rnaCntTable, riboCntTable, rnaCond, riboCond,
                    contrast=NULL, minMeanCount=1) {

  ### append sample name with .rna or .ribo
  ### in case user uses sample sample name
  colnames(rnaCntTable) <- paste0(colnames(rnaCntTable), ".rna")
  colnames(riboCntTable) <- paste0(colnames(riboCntTable), ".ribo")

  ### input validation
  if (!identical(rownames(rnaCntTable), rownames(riboCntTable)))
    stop ("RNA- and Ribo-seq data must have the same set of genes")

  if (!is.data.frame(rnaCond)) rnaCond <- data.frame(cond = rnaCond)
  if (!is.data.frame(riboCond)) riboCond <- data.frame(cond = riboCond)

  if (!identical(colnames(rnaCond), colnames(riboCond)))
    stop("RNA- and Ribo-seq data must have the same set of conditions")

  if (ncol(rnaCond) > 1)
    stop("Voom is not supported in multi-factor experiment yet")

  if (ncol(rnaCntTable) != nrow(rnaCond))
    stop(paste("RNA-seq count table must have the",
               "same number of samples as in rnaCond"))

  if (ncol(riboCntTable) != nrow(riboCond))
    stop(paste("Ribo-seq count table must have the",
               "same number of samples as in riboCond"))

  if (minMeanCount < 1)
    stop("minMeanCount must at least be 1")

  ### filter out low read count
  keep.rna <- rownames(rnaCntTable)[rowMeans(rnaCntTable) >= minMeanCount]
  keep.ribo <- rownames(riboCntTable)[rowMeans(riboCntTable) >= minMeanCount]
  keep <- intersect(keep.rna, keep.ribo)
  rnaCntTable <- rnaCntTable[keep,]
  riboCntTable <- riboCntTable[keep,]

  ## combine counts
  combCntTbl <- cbind(rnaCntTable, riboCntTable)

  dge <- DGEList(counts = combCntTbl)
  dge <- calcNormFactors(dge)
  design <- combineDesignMatrix(rnaCond, riboCond)

  message("applying Voom to modified design matrix")

  v <- voom(dge, design, plot=FALSE)
  fit <- lmFit(v, design)
  if(!is.null(contrast)) {
    fit <- contrasts.fit(fit, contrasts = contrast)
  }
  fit <- eBayes(fit)

  topGenes <- topTable(fit, coef=ncol(design), number=Inf)

  ## order results by gene names
  topGenes <- topGenes[order(rownames(topGenes)),]
  topGenes
}

riborex <- function(rnaCntTable, riboCntTable, rnaCond, riboCond,
                    engine="DESeq2", contrast=NULL, minMeanCount=1) {

  if (engine == "DESeq2") {
    message("DESeq2 mode selected")
    DESeq2Rex(rnaCntTable, riboCntTable, rnaCond, riboCond,
              contrast, minMeanCount)
  }
  else if (engine == "edgeR") {
    message("edgeR mode selected")
    edgeRRex(rnaCntTable, riboCntTable, rnaCond, riboCond,
             contrast, minMeanCount)
  }
  else if (engine == "edgeRD") {
    message("edgeRD mode selected")
    edgeRDRex(rnaCntTable, riboCntTable, rnaCond, riboCond,
              contrast, minMeanCount)
  }
  else if (engine == "Voom") {
    message("Voom mode selected")
    voomRex(rnaCntTable, riboCntTable, rnaCond, riboCond,
            contrast, minMeanCount)
  }
  else {
    stop ("Error: unrecognized engine name")
  }
}
smithlabcode/riborex documentation built on April 12, 2021, 11:46 a.m.