# Set seed for reproducibility set.seed(1454944673) library(knitr) library(ggplot2) opts_chunk[["set"]]( autodep = TRUE, bootstrap.show.code = FALSE, cache = TRUE, cache.lazy = TRUE, dev = c("png", "pdf"), error = TRUE, fig.height = 10, fig.retina = 2, fig.width = 10, highlight = TRUE, message = FALSE, prompt = TRUE, # formatR required for tidy code tidy = TRUE, warning = FALSE) theme_set( theme_light(base_size = 14)) theme_update( legend.justification = "center", legend.position = "bottom") download.file("https://github.com/hbc/bcbioRNASeq/raw/master/inst/rmarkdown/shared/bibliography.bib", "bibliography.bib")
loadlibs <- function(){ library(ChIPQC) library(ChIPseeker) library(reshape) library(pheatmap) library(RColorBrewer) library(gridExtra) } suppressPackageStartupMessages(loadlibs()) # Directory paths outputDir <- params$outputDir dataDir <- dirname(params$bcbFile) resDir <- file.path(outputDir, "results") dir.create(deDir, showWarnings = FALSE, recursive = TRUE)
Add a short description of the project here
## If bcbio generates this we just need: # load('data/chipQCobj.rda`) ## Load sample data samples <- read.csv('meta/samplesheet_chr12.csv') View(samples) ## Create ChIPQC object chipObj <- ChIPQC(samples, annotation="hg19") ## Create ChIPQC report ChIPQCreport(chipObj, reportName="ChIP QC report: Nanog and Pou5f1", reportFolder="ChIPQCreport")
Here, we display the different samples in our dataset and any pertinent information about them. We list:
QCmetrics(chipObj) QCmetadata(chipObj)
If bcbio is already running deepTools I think we can easily compute a multiBAM matrix which can be useful to make nicer plots here (rather than the ChIPQC ones)
# Read in data counts <- read.delim("deepTools/readCounts.tab", sep="\t") # Create row names from chromosomal coordinates test <- apply(counts[,2:3], 1, function(x){paste(x, collapse='-')}) test <- cbind(as.character(counts[,1]), test) rnames <- apply(test, 1, function(x){paste(x, collapse=':')}) plot_counts <- data.frame(counts[,4:ncol(counts)], row.names=rnames) # Change column names cnames <- sapply(meta$shortname, function(x){grep(x, colnames(plot_counts))}) plot_counts <- plot_counts[,as.vector(cnames)] colnames(plot_counts) <- meta$shortname # Set annotation and colors annotation <- data.frame(meta[,c(2,4,6)], row.names=meta$shortname) heat.colors <- brewer.pal(6, "YlOrRd") pheatmap(cor(plot_counts), color=heat.colors, annotation=annotation)
Based on PC1 of all samples, the greatest amount of variance can be attributed to the H3K27 IP samples. For the rest of the samples we don't really see segregation of samples by either strain or IP.
pca <- prcomp(t(plot_counts)) df <- cbind(meta, pca$x[,c('PC1', 'PC2')]) # Plot with sample names used as data points ggplot(df) + geom_point(aes(PC1, PC2, color = IP, shape=strain), size=5) + theme_bw() + xlab('PC1 (43.6% variance explained)') + ylab('PC2 (36.3% variance explained)') + # geom_text(aes(label=df$shortname, size=5)) + theme(plot.title = element_text(size = rel(1.5)), axis.title = element_text(size = rel(1.5)), axis.text = element_text(size = rel(1.25)))
GRanges::findOverlaps
idr
R packageAdd the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.