# Set seed for reproducibility
set.seed(1454944673)
library(knitr)
library(ggplot2)

opts_chunk[["set"]](
    autodep = TRUE,
    bootstrap.show.code = FALSE,
    cache = TRUE,
    cache.lazy = TRUE,
    dev = c("png", "pdf"),
    error = TRUE,
    fig.height = 10,
    fig.retina = 2,
    fig.width = 10,
    highlight = TRUE,
    message = FALSE,
    prompt = TRUE,
    # formatR required for tidy code
    tidy = TRUE,
    warning = FALSE)

theme_set(
    theme_light(base_size = 14))
theme_update(
    legend.justification = "center",
    legend.position = "bottom")
download.file("https://github.com/hbc/bcbioRNASeq/raw/master/inst/rmarkdown/shared/bibliography.bib", "bibliography.bib")
loadlibs <- function(){
library(ChIPQC)
library(ChIPseeker)
library(reshape)
library(pheatmap)
library(RColorBrewer)
library(gridExtra)
}
suppressPackageStartupMessages(loadlibs())

# Directory paths
outputDir <- params$outputDir
dataDir <- dirname(params$bcbFile)
resDir <- file.path(outputDir, "results")
dir.create(deDir, showWarnings = FALSE, recursive = TRUE)

Overview

Add a short description of the project here


Load in data

## If bcbio generates this we just need:
# load('data/chipQCobj.rda`)

## Load sample data
samples <- read.csv('meta/samplesheet_chr12.csv')
View(samples)

## Create ChIPQC object
chipObj <- ChIPQC(samples, annotation="hg19") 

## Create ChIPQC report
ChIPQCreport(chipObj, reportName="ChIP QC report: Nanog and Pou5f1", reportFolder="ChIPQCreport")

Sample metadata

Here, we display the different samples in our dataset and any pertinent information about them. We list:

QCmetrics(chipObj)
QCmetadata(chipObj)

Sample similarity {.tabset}

If bcbio is already running deepTools I think we can easily compute a multiBAM matrix which can be useful to make nicer plots here (rather than the ChIPQC ones)

Correlation Heatmap

# Read in data
counts <- read.delim("deepTools/readCounts.tab", sep="\t")

# Create row names from chromosomal coordinates
test <- apply(counts[,2:3], 1, function(x){paste(x, collapse='-')})
test <- cbind(as.character(counts[,1]), test)
rnames <- apply(test, 1, function(x){paste(x, collapse=':')})

plot_counts <- data.frame(counts[,4:ncol(counts)], row.names=rnames)

# Change column names
cnames <- sapply(meta$shortname, function(x){grep(x, colnames(plot_counts))})
plot_counts <- plot_counts[,as.vector(cnames)]
colnames(plot_counts) <- meta$shortname

# Set annotation and colors
annotation <- data.frame(meta[,c(2,4,6)], row.names=meta$shortname)
heat.colors <- brewer.pal(6, "YlOrRd")

pheatmap(cor(plot_counts), color=heat.colors, annotation=annotation)

PCA

Based on PC1 of all samples, the greatest amount of variance can be attributed to the H3K27 IP samples. For the rest of the samples we don't really see segregation of samples by either strain or IP.

pca <- prcomp(t(plot_counts))
df <- cbind(meta, pca$x[,c('PC1', 'PC2')]) 

# Plot with sample names used as data points
 ggplot(df) + 
  geom_point(aes(PC1, PC2, color = IP, shape=strain), size=5) +
  theme_bw() +
  xlab('PC1 (43.6% variance explained)') +
  ylab('PC2 (36.3% variance explained)') +
  # geom_text(aes(label=df$shortname, size=5)) +
  theme(plot.title = element_text(size = rel(1.5)),
        axis.title = element_text(size = rel(1.5)),
        axis.text = element_text(size = rel(1.25)))

Replicate Evaluation

Profile plots and Heatmaps

Annotation

Functional Analysis

Motif Analysis



hbc/hbcABC documentation built on Sept. 5, 2019, 9:51 p.m.