library(DESeq2)
library(vsn)
library(DEGreport)
design = ~ group
condition = "group"

Differential expression

dds = DESeqDataSetFromTximport(txi.salmon,
    colData=metadata, design = ~ group)
dds = DESeq(dds)

Effect of variance stabilization

notAllZero <- (rowSums(counts(dds))>0)
rld <- rlog(dds)
vsd <- varianceStabilizingTransformation(dds)
rlogMat <- assay(rld)
vstMat <- assay(vsd)

save_file(vstMat, "rlog2_expression.tsv", path_results)
save_file(counts(dds, normalized=TRUE), "normalized_counts_expression.tsv", path_results)

p1=meanSdPlot(log2(counts(dds,normalized=TRUE)[notAllZero,] + 1), plot=F)$gg + ggtitle("log2 transformation")
p2=meanSdPlot(rlogMat[notAllZero,], plot=F)$gg + ggtitle("variance stabilization transformation")
p3=meanSdPlot(vstMat[notAllZero,], plot=F)$gg + ggtitle("regularized log transformation")
grid.arrange(p1,p2,p3)

Dispersion estimates

plotDispEsts(dds)
handle_deseq2 = function(dds, summarydata, column, all_combs=NULL) {
  if (is.null(all_combs)){
    all_combs = combn(levels(summarydata[,column]), 2, simplify=FALSE)
  }
  all_results = list()
  contrast_strings = list()
  for(comb in all_combs) {
    contrast_string = paste(comb, collapse="_vs_")
    contrast = c(column, comb)
    res = results(dds, contrast=contrast)
    res = res[order(res$padj),]
    all_results = c(all_results, res)
    contrast_strings = c(contrast_strings, contrast_string)
  }
  names(all_results) = contrast_strings
  return(all_results)
}

plot_genes = function(dds, gene){
  DESeq2::plotCounts(dds, gene, intgroup = "group")
}

get_groups <- function(d, comp, condition)
{
  g <- unlist(strsplit(comp,"_vs_"))
  g1 <- row.names(d)[d[, (names(d)==condition)]==g[1]]
  g2 <- row.names(d)[d[, (names(d)==condition)]==g[2]]
  list(g1,g2)
}

print_de = function(all_results, prefix = "file_", org = "NULL", PADJ=0.01, LOG2FC=3){
  # read dds and vstMat from outter scope
  for(i in seq(length(all_results))) {
    title = names(all_results)[i]
    cat(paste("## Comparison: ", title, prefix, "\n\n"))
    out_df = as.data.frame(all_results[[i]])
    out_df = out_df[!is.na(out_df$padj),]
    out_df = out_df[order(out_df$padj),]
    out_df$symbol = convertIDs(rownames(out_df), 
                               "ENSEMBL", "SYMBOL", org, "useFirst")
    out_df$description = convertIDs(rownames(out_df), 
                                    "ENSEMBL", "GENENAME", org, "useFirst")

    cat("\n",paste(capture.output(summary(out_df))[1:8], collapse = "<br>"),"\n")

    cat("\n\n### MA plot plot\n\n")
    DESeq2::plotMA(all_results[[i]])
    title(paste("MA plot for contrast", title))

    cat("\n\n### Volcano plot\n\n")
    stats = as.data.frame(out_df[,c(2,6)])
    volcano_density_plot(stats, title=title, lfc.cutoff=1.5)

    cat("\n\n### QC for DE genes mean/variance\n")
    g <- get_groups(metadata, title, condition)
    .c = counts(dds,normalized=TRUE)
    p = degMV(g[[1]], g[[2]], out_df$padj, .c[row.names(out_df),]) +
      ggtitle(paste0("Mean-vs-Variation for ", title))
    print(p)

    cat("\n\n### Heatmap most significand, padj<0.05\n")
    sign = row.names(out_df)[out_df$padj < PADJ & !is.na(out_df$padj) & abs(out_df$log2FoldChange) > LOG2FC]

    if (length(sign)<2){
      cat("Too few genes to plot.")
    }else{
      heatmap_fn(vstMat[sign, unlist(g)], show_rownames = F)
    }
    cat("\n")

    cat("\n\n### Top DE genes\n\n")
    print(kable(head(out_df, 20)))
    fn = paste0(title, prefix, ".tsv")
    save_file(out_df, fn, path_results)
    cat("\n\nDifferential expression results at: ", fn, " file.")
    cat("\n\n")

    cat("\n\n### GO ontology of DE genes (logFC>1 and FDR < 1%):\n\n")
    .res = as.data.frame(out_df)
    .idx = .res$padj < PADJ & .res$log2FoldChange > LOG2FC
    .idx[is.na(.idx)] = FALSE
    .de = out_df$symbol[.idx]
    .accnum = convertIDs(.de, "SYMBOL", "ENTREZID", org, "useFirst")
    ego <- enrichGO(gene = .accnum[!is.na(.accnum)], 
                    OrgDb = org, ont = "BP", pAdjustMethod = "BH",
                    pvalueCutoff = 0.01, qvalueCutoff = 0.05, readable = TRUE)
    save_file(summary(ego), paste0(title, prefix, "_goenrich.tsv"), path_results)
    print(print_enrichGO(summary(ego), 30))
    cat("\n\n")
  }

}

Results

# all_combs = list(day=c("normal", "day14"))
all_results = handle_deseq2(dds, metadata, condition, all_combs)
print_de(all_results, "test_", org.Mm.eg.db)

R Session Info

name_res = compress_results(path_results)

(useful if replicating these results)

sessionInfo()


lpantano/myRfunctions documentation built on May 21, 2019, 7:50 a.m.