Update and replace the cal_diff_limma()
function from rnaseqTools
to use a HermesData
object and a string with factor name for comparison as input.
In the rnaseqTools
vignettes pg101_example_voom.Rmd
, the authors suggest to filter out low expression genes, as well as samples with low depth or technical failure from subsequent workflow. As we directly take a HermesData
as object, users cannot manually take filtering.
HermesData
object. diff_expression()
function extracts the count assay from the HermesData
object. (The function only accepts un-normalized counts assay for analysis.)HermesData
object. The diff_expression()
function generates the corresponding design component. HermesData
object and the factor compatible to run limma
and DESeq2
methods, respectively.limma
, the diff_expression()
runs limma::voom
, limma::lmFit
and limma::eBayes
sequentially to obtain a data frame in the class HermesDataDiffExpr
.deseq2
, the diff_expression()
runs DESeq
to obtain a data frame in the class HermesDataDiffExpr
.autoplot()
functions for plotting the HermesDataDiffExpr
data. Later the user would proceed like this maybe:
- first create, add flags, filter the HermesData
object
- then do diff_expression()
on it, choose the method as argument
- then apply autoplot()
on the result to obtain e.g. volcano plot
- implies that we need to have e.g. HermesDataDiffExpr
object as result of function call above
Example:
object <- hermes_data %>% add_quality_flags() %>% filter()
We require that the HermesDataDiffExpr
has columns:
- log2_fc
: estimate of the log2 fold change between the 2 levels of the provided factor
- stat
: test statistic (depends on the method used)
- p_val
: raw p-value
- adj_p_val
: adjusted p-value
.HermesDataDiffExpr <- setClass( Class = "HermesDataDiffExpr", contains = "data.frame" ) .diff_expr_cols <- c( "log2_fc", "stat", "p_val", "adj_p_val" ) S4Vectors::setValidity2( Class = "HermesDataDiffExpr", method = function(object) { msg <- validate_cols( required = .diff_expr_cols, actual = colnames(object) ) if (is.null(msg)) TRUE else msg } )
diff_expression()
We use here new getter functions for QC flags.
diff_expression <- function(object, group, method = c("limma_voom", "deseq2")) { assert_that( is_hermes_data(object), is.string(group) ) expect_factor(colData(object)[[group]], n.levels = 2L) method <- match.arg(method, c("limma_voom", "deseq2")) if (anyNA(get_tech_failure(object))) { warning("NAs in technical failure flags, please make sure to use `add_quality_flags()` beforehand") } if (anyNA(get_low_depth(object))) { warning("NAs in low depth flags, please make sure to use `add_quality_flags()` beforehand") } if (anyNA(get_low_expression(object))) { warning("NAs in low expression flags, please make sure to use `add_quality_flags()` beforehand") } form <- as.formula(paste("~", group)) design <- model.matrix(form, data = colData(object)) result <- switch( method, "limma_voom" = h_diff_expr_limma(object, design), "deseq2" = h_diff_expr_deseq2(object, design) ) .HermesDataDiffExpr(result) }
Note that we mainly do checks in this function and produce the model matrix.
library(limma) h_diff_expr_limma <- function(object, design) { assert_that( is_hermes_data(object), is.matrix(design) ) obj_count_voom <- limma::voom(counts(object)) fit <- limma::lmFit(obj_count_voom, design) eb <- limma::eBayes(fit) top_tab <- limma::topTable( eb, coef = 2L, n = nrow(obj_count_voom), # Retain all genes. sort.by = "p" # Use adjusted p-value to sort. ) with( top_tab, data.frame( log2_fc = logFC, stat = t, p_val = P.Value, adj_p_val = adj.P.Val, row.names = rownames(top_tab) ) ) }
Let's try this out:
design <- model.matrix(~ SEX, colData(object)) res_limma <- h_diff_expr_limma(object, design) head(res_limma)
We try here to obtain the same result structure - data frame with these columns above.
library(DESeq2) h_diff_expr_deseq2 <- function(object, design) { assert_that( is_hermes_data(object), is.matrix(design) ) deseq_data <- DESeqDataSet(se = object, design = design) deseq_data_processed <- DESeq(deseq_data, quiet = TRUE) deseq_data_res <- results(deseq_data_processed) # Note: this has multiple options we might want to use. deseq_data_res_df <- as.data.frame(deseq_data_res) deseq_data_res_df <- deseq_data_res_df[order(deseq_data_res_df$padj), ] # Use adj p-value to sort. with( deseq_data_res_df, data.frame( log2_fc = log2FoldChange, stat = stat, p_val = pvalue, adj_p_val = padj, row.names = rownames(deseq_data_res_df) ) ) }
Let's also try this out:
design <- model.matrix(~ SEX, colData(object)) res_deseq2 <- h_diff_expr_deseq2(object, design) head(res_deseq2)
Now we can try out the wrapper function too:
colData(object)$SEX <- factor(colData(object)$SEX) res1 <- diff_expression(object, "SEX", method = "limma_voom") res2 <- diff_expression(object, "SEX", method = "deseq2") head(res1) head(res2)
Now let's look at the volcano plot.
library(ggrepel) setMethod( f = "autoplot", signature = signature(object = "HermesDataDiffExpr"), definition = function(object, adj.P.Val.threshold = 0.05, logFC.threshold = 2.5) { object$difexpr <- "NO" object$difexpr[object$log2_fc > abs(logFC.threshold) & object$adj_p_val < adj.P.Val.threshold] <- "UP" object$difexpr[object$log2_fc < -abs(logFC.threshold) & object$adj_p_val < adj.P.Val.threshold] <- "DOWN" object$label <- NA object$label[object$difexpr != "NO"] <- rownames(object)[object$difexpr != "NO"] ggplot(data=object, aes(x=log2_fc, y=-log10(adj_p_val), col=difexpr, label=label) ) + geom_point() + geom_text_repel() + xlab("log2 fold change") + ylab("-log10 adjusted p-value") + geom_vline(xintercept=c(-abs(logFC.threshold), abs(logFC.threshold)), col="black") + geom_hline(yintercept=-log10(adj.P.Val.threshold), col="black") } )
Let's try it out:
autoplot(res1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.