makeSEwrapper: Make SummarizedExpriment instance wrapper

Description Usage Arguments Examples

Description

A wrapper funtion that performed several tasks including counting hit, visualization of sample distance, DESeq analysis and report with annotation.

Usage

1
2
3
4
5
6
makeSEwrapper(bamFiles, pkgDir, OrgDb, TxDb = NULL, features = NULL,
              workers = 1L, pheno_type = NULL,
              title = "Project", plot.PCA = TRUE,
              gene_id_type = c("ENTREZID", "ENSEMBL"),
	      do.DESeq = FALSE, lfcThreshold = 0,
	      print.report = FALSE)

Arguments

bamFiles
pkgDir
OrgDb
TxDb
features
workers
pheno_type
title
plot.PCA
gene_id_type
do.DESeq
lfcThreshold
print.report

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (bamFiles, pkgDir, OrgDb, TxDb = NULL, features = NULL, 
    workers = 1L, pheno_type = NULL, title = "Project", plot.PCA = TRUE, 
    gene_id_type = c("ENTREZID", "ENSEMBL"), do.DESeq = FALSE, 
    lfcThreshold = 0, print.report = FALSE) 
{
    require(DESeq2)
    require(GenomicFeatures)
    require(BiocParallel)
    require(Rsamtools)
    require(ggplot2)
    require(GenomicAlignments)
    require(pheatmap)
    require(RColorBrewer)
    mparam <- MulticoreParam(workers = workers, type = "SOCK")
    register(mparam, default = TRUE)
    registered()
    if (!all(file.exists(bamFiles))) 
        stop("Some of the bam files do not exists.")
    if (!file.exists(pkgDir)) 
        stop(pkgDir, " does not exist")
    if (is.null(TxDb) & is.null(features)) 
        stop("TxDb and features cannot be both NULL")
    if (!is.null(TxDb)) 
        features <- GenomicFeatures::exonsBy(TxDb, by = "gene")
    instDir <- file.path(pkgDir, "inst")
    if (!file.exists(instDir)) 
        system(paste("mkdir", instDir))
    dataDir <- file.path(pkgDir, "data")
    if (!file.exists(dataDir)) 
        system(paste("mkdir", dataDir))
    message("Read counts and make summarizedExperiment object")
    bamFiles <- Rsamtools::BamFileList(bamFiles)
    se <- GenomicAlignments::summarizeOverlaps(features = features, 
        reads = bamFiles, mode = "IntersectionStrict", inter.feature = TRUE, 
        singleEnd = TRUE, ignore.strand = TRUE, BPPARAM = mparam)
    colnames(se) <- sub(".bam", "", colnames(se))
    message("Get sample information")
    sampleInfo <- data.frame(sample_name = colnames(se), file_bam = path(bamFiles), 
        stringsAsFactors = FALSE)
    si <- SGSeq::getBamInfo(sampleInfo, cores = workers)
    colData(se) <- as(si, "DataFrame")
    if (!is.null(pheno_type)) {
        if (!is.factor(pheno_type)) {
            pheno_type <- factor(pheno_type)
        }
        se$pheno_type <- pheno_type
    }
    colnames(se) <- sub(".bam", "", colnames(se))
    if (plot.PCA) {
        message("Plot sample distance by PCA and clustering.")
        figDir <- file.path(pkgDir, "inst", "figures")
        if (!file.exists(figDir)) 
            system(paste("mkdir", figDir))
        dds <- DESeqDataSet(se[rowSums(assays(se)[[1]]) > ncol(se), 
            ], design = ~pheno_type)
        rlg <- rlog(dds)
        data <- plotPCA(rlg, intgroup = c("pheno_type"), returnData = TRUE)
        percentVar <- round(100 * attr(data, "percentVar"))
        qplot(PC1, PC2, color = pheno_type, data = data) + xlab(paste0("PC1: ", 
            percentVar[1], "% variance")) + ylab(paste0("PC2: ", 
            percentVar[2], "% variance")) + labs(list(title = title))
        ggsave(file = file.path(figDir, paste0(title, "_SampleDistancePCAplot.png")))
        sampleDists <- dist(t(assay(rlg)))
        colors <- colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(255)
        sampleDistMatrix <- as.matrix(sampleDists)
        fname <- file.path(figDir, paste0(title, "_SampleDistanceHeatmap.png"))
        annotation_col <- data.frame(pheno_type = factor(rlg$pheno_type))
        rownames(annotation_col) <- colnames(rlg)
        pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, 
            clustering_distance_cols = sampleDists, border_color = NA, 
            treeheight_row = 0, main = paste0(title, " - sample distance"), 
            annotation_col = annotation_col, col = colors, silent = TRUE, 
            filename = fname)
    }
    if (do.DESeq) {
        if (is.null(pheno_type)) 
            stop("The formula (pheno_type) for DESeq is not given.")
        dds <- DESeqDataSet(se[rowSums(assays(se)[[1]]) > ncol(se), 
            ], design = ~pheno_type)
        message("DESeq (~ pheno_type) ...")
        dds <- DESeq(dds)
        if (print.report) {
            statsDir <- file.path(pkgDir, "inst", "stats")
            filename <- file.path(statsDir, paste0(title, "_results_dds.csv"))
            message("Generate report: ", filename)
            if (!file.exists(statsDir)) 
                system(paste("mkdir", statsDir))
            res <- simpleDESeqReport(dds = dds, OrgDb = OrgDb, 
                lfcThreshold = lfcThreshold, gene_id_type = gene_id_type, 
                filename = filename)
        }
    }
    if (!do.DESeq) 
        dds <- "No DESeq result returned"
    return(list(se = se, dds = dds))
  }

TapscottLab/SeqPipelineTools documentation built on May 16, 2019, 2:28 a.m.