# Estimate Disperion for all Labeled and Total samples
#
# Author: demel
###############################################################################
#' Estimates gene-specific dispersion estimates for total and labeled samples,
#' using DESeq2
#' The DESeq estimation does only work correctly, if at least two samples are
#' available for each class(labeled/total),
#' either replicates or different conditions.
#'
#' @param counts count table for genes (rows) and samples (columns)
#' @param conditions.labeling vector indicating for each sample if it was
#' labeled ("L") or total ("T")
#' @param conditions vector giving the experimental condition for each sample
#' @param type type of disperion from deseq, "dispersion"=="dispMAP"
#' @return data.frame with dispersion estimates for every gene (rows) and
#' labeled and total samples(columns)
#'
#' @importFrom DESeq2 DESeqDataSetFromMatrix
#' @importFrom DESeq2 DESeq
#'
#' @examples
#' data(gene.counts)
#' data(samples)
#' dispersion = estimateGeneDispersion(gene.counts, samples$conditions.labeling,
#' samples$conditions)
#'
#' @author Carina Demel
#' @export
estimateGeneDispersion <- function(
counts,
conditions.labeling,
conditions = colnames(counts),
type = c("dispersion", "dispGeneEst", "dispFit", "dispMAP")
){
expDesign.allgene <- data.frame( row.names = colnames(counts),
labeling = factor(conditions.labeling),
condition = factor(conditions))
expDesign.total <- subset(expDesign.allgene, labeling == "T")
message(paste(
"For the Total gene dispersion, the following samples are used:",
paste(rownames(expDesign.total), collapse = ", ")))
deseq.matrix.total <- DESeqDataSetFromMatrix(
counts[, conditions.labeling == "T"],
colData = expDesign.total,
design = formula(~ condition))
dds.total <- suppressMessages(DESeq(deseq.matrix.total))
expDesign.labeled <- subset(expDesign.allgene, labeling == "L")
message(paste(
"For the Labeled gene dispersion, the following samples are used:",
paste(rownames(expDesign.labeled), collapse = ", ")))
deseq.matrix.labeled <- DESeqDataSetFromMatrix(
counts[,conditions.labeling == "L"],
colData = expDesign.labeled,
design = formula(~ condition))
dds.labeled <- suppressMessages(DESeq(deseq.matrix.labeled))
suppressWarnings(
if (type == "dispGeneEst") {
dispersion.labeled <- mcols(dds.labeled)$dispGeneEst
dispersion.total <- mcols(dds.total)$dispGeneEst
} else if ( type == "dispFit" ) {
dispersion.labeled <- mcols(dds.labeled)$dispFit
dispersion.total <- mcols(dds.total)$dispFit
} else {
dispersion.labeled <- mcols(dds.labeled)$dispersion
dispersion.total <- mcols(dds.total)$dispersion
}
)
# in cases where no dispersion could be fitted because of missing values,
# set dispersion to median gene dispersion
if (sum(is.na(dispersion.labeled)) > 0) {
message(paste("Some feature-specific dispersions could not be",
"estimated (because of 0 counts) and were set to",
"the median dispersion value of all features."))
}
dispersion.labeled[is.na(dispersion.labeled)] <-
median(dispersion.labeled, na.rm=TRUE)
dispersion.total[is.na(dispersion.total)] <-
median(dispersion.total, na.rm=TRUE)
dispersion <- cbind("L" = dispersion.labeled, "T" = dispersion.total)
rownames(dispersion) <- rownames(dds.labeled)
return(dispersion)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.