Description Usage Arguments Examples
A wrapper funtion that performed several tasks including counting hit, visualization of sample distance, DESeq analysis and report with annotation.
1 2 3 4 5 6 |
bamFiles |
|
pkgDir |
|
OrgDb |
|
TxDb |
|
features |
|
workers |
|
pheno_type |
|
title |
|
plot.PCA |
|
gene_id_type |
|
do.DESeq |
|
lfcThreshold |
|
print.report |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (bamFiles, pkgDir, OrgDb, TxDb = NULL, features = NULL,
workers = 1L, pheno_type = NULL, title = "Project", plot.PCA = TRUE,
gene_id_type = c("ENTREZID", "ENSEMBL"), do.DESeq = FALSE,
lfcThreshold = 0, print.report = FALSE)
{
require(DESeq2)
require(GenomicFeatures)
require(BiocParallel)
require(Rsamtools)
require(ggplot2)
require(GenomicAlignments)
require(pheatmap)
require(RColorBrewer)
mparam <- MulticoreParam(workers = workers, type = "SOCK")
register(mparam, default = TRUE)
registered()
if (!all(file.exists(bamFiles)))
stop("Some of the bam files do not exists.")
if (!file.exists(pkgDir))
stop(pkgDir, " does not exist")
if (is.null(TxDb) & is.null(features))
stop("TxDb and features cannot be both NULL")
if (!is.null(TxDb))
features <- GenomicFeatures::exonsBy(TxDb, by = "gene")
instDir <- file.path(pkgDir, "inst")
if (!file.exists(instDir))
system(paste("mkdir", instDir))
dataDir <- file.path(pkgDir, "data")
if (!file.exists(dataDir))
system(paste("mkdir", dataDir))
message("Read counts and make summarizedExperiment object")
bamFiles <- Rsamtools::BamFileList(bamFiles)
se <- GenomicAlignments::summarizeOverlaps(features = features,
reads = bamFiles, mode = "IntersectionStrict", inter.feature = TRUE,
singleEnd = TRUE, ignore.strand = TRUE, BPPARAM = mparam)
colnames(se) <- sub(".bam", "", colnames(se))
message("Get sample information")
sampleInfo <- data.frame(sample_name = colnames(se), file_bam = path(bamFiles),
stringsAsFactors = FALSE)
si <- SGSeq::getBamInfo(sampleInfo, cores = workers)
colData(se) <- as(si, "DataFrame")
if (!is.null(pheno_type)) {
if (!is.factor(pheno_type)) {
pheno_type <- factor(pheno_type)
}
se$pheno_type <- pheno_type
}
colnames(se) <- sub(".bam", "", colnames(se))
if (plot.PCA) {
message("Plot sample distance by PCA and clustering.")
figDir <- file.path(pkgDir, "inst", "figures")
if (!file.exists(figDir))
system(paste("mkdir", figDir))
dds <- DESeqDataSet(se[rowSums(assays(se)[[1]]) > ncol(se),
], design = ~pheno_type)
rlg <- rlog(dds)
data <- plotPCA(rlg, intgroup = c("pheno_type"), returnData = TRUE)
percentVar <- round(100 * attr(data, "percentVar"))
qplot(PC1, PC2, color = pheno_type, data = data) + xlab(paste0("PC1: ",
percentVar[1], "% variance")) + ylab(paste0("PC2: ",
percentVar[2], "% variance")) + labs(list(title = title))
ggsave(file = file.path(figDir, paste0(title, "_SampleDistancePCAplot.png")))
sampleDists <- dist(t(assay(rlg)))
colors <- colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(255)
sampleDistMatrix <- as.matrix(sampleDists)
fname <- file.path(figDir, paste0(title, "_SampleDistanceHeatmap.png"))
annotation_col <- data.frame(pheno_type = factor(rlg$pheno_type))
rownames(annotation_col) <- colnames(rlg)
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists, border_color = NA,
treeheight_row = 0, main = paste0(title, " - sample distance"),
annotation_col = annotation_col, col = colors, silent = TRUE,
filename = fname)
}
if (do.DESeq) {
if (is.null(pheno_type))
stop("The formula (pheno_type) for DESeq is not given.")
dds <- DESeqDataSet(se[rowSums(assays(se)[[1]]) > ncol(se),
], design = ~pheno_type)
message("DESeq (~ pheno_type) ...")
dds <- DESeq(dds)
if (print.report) {
statsDir <- file.path(pkgDir, "inst", "stats")
filename <- file.path(statsDir, paste0(title, "_results_dds.csv"))
message("Generate report: ", filename)
if (!file.exists(statsDir))
system(paste("mkdir", statsDir))
res <- simpleDESeqReport(dds = dds, OrgDb = OrgDb,
lfcThreshold = lfcThreshold, gene_id_type = gene_id_type,
filename = filename)
}
}
if (!do.DESeq)
dds <- "No DESeq result returned"
return(list(se = se, dds = dds))
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.