Nothing
## ----setup, include = FALSE-----------------------------------------------------------------------
knitr::opts_chunk$set(
collapse = FALSE,
comment = "#>",
fig.height = 5,
fig.width = 5
)
old.ops <- options(width = 100)
## ----dataset--------------------------------------------------------------------------------------
library(MAnorm2)
head(H3K27Ac)
## ----H3K27Ac--------------------------------------------------------------------------------------
library(MAnorm2)
head(H3K27Ac)
## ----H3K27AcMetaInfo------------------------------------------------------------------------------
attr(H3K27Ac, "metaInfo")
## ----cmpBioReps-----------------------------------------------------------------------------------
# Perform within-group normalization.
norm <- normalize(H3K27Ac, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)
# Construct a bioCond for each group of samples.
conds <- list(GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))
# Perform between-group normalization.
# Restrict common peak regions to autosomes only when the two groups
# being compared are associated with different genders.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)
# Fit a mean-variance curve.
# If the following function call raises an error,
# set init.coef = c(0.1, 10) or try method = "local".
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE)
# Perform differential tests.
res <- diffTest(conds[[1]], conds[[2]])
head(res)
## ----oneStepNorm----------------------------------------------------------------------------------
# One-step normalization.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, count = 5:8, occupancy = 10:13,
common.peak.regions = autosome)
## ----normInfo-------------------------------------------------------------------------------------
names(attributes(norm))
attributes(norm)[5:8]
# This statement requires the gplots package (>= 3.0.1).
plot(attr(norm, "MA.cor"), symbreaks = TRUE, margins = c(8, 8),
cexRow = 1, cexCol = 1)
## ----MAplotDefault, fig.show = "hold", fig.height = 4.7, fig.width = 4.7--------------------------
# Before normalization.
raw <- log(H3K27Ac[7:8] + 0.5, base = 2)
MAplot(raw[[1]], raw[[2]], norm[[12]], norm[[13]], ylim = c(-2, 2),
main = "Before normalization")
abline(h = 0, lwd = 2, lty = 5)
# After normalization.
MAplot(norm[[7]], norm[[8]], norm[[12]], norm[[13]], ylim = c(-2, 2),
main = "After normalization")
abline(h = 0, lwd = 2, lty = 5)
## ----withinNorm-----------------------------------------------------------------------------------
# Within-group normalization.
norm <- normalize(H3K27Ac, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)
## ----bioCond--------------------------------------------------------------------------------------
# Construct a bioCond for each LCL.
conds <- list(GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))
## ----summaryBioCond-------------------------------------------------------------------------------
summary(conds$GM12891)
## ----betweenNorm----------------------------------------------------------------------------------
# Between-group normalization.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)
## ----MAplotBioCond--------------------------------------------------------------------------------
MAplot(conds[[1]], conds[[2]], ylim = c(-12, 12), main = "GM12891 vs. GM12892")
abline(h = 0, lwd = 2, lty = 5)
## ----fitMeanVarCurve------------------------------------------------------------------------------
# Fit an MVC.
# The "parametric" method sometimes requires the users to explicitly specify
# initial coefficients. Try setting init.coef = c(0.1, 10) in these cases.
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE)
## ----summaryMVC-----------------------------------------------------------------------------------
summary(conds$GM12891)
summary(conds$GM12892)
## ----plotMeanVarCurve-----------------------------------------------------------------------------
# Plot only occupied genomic intervals,
# as only these intervals have been used to fit the MVC.
plotMeanVarCurve(conds, subset = "occupied", ylim = c(-7, 0.5))
## ----diffTestBioCond------------------------------------------------------------------------------
res <- diffTest(conds[[1]], conds[[2]])
## ----showRes--------------------------------------------------------------------------------------
head(res)
## ----MAplotDiffBioCond----------------------------------------------------------------------------
MAplot(res, padj = 0.001)
abline(h = 0, lwd = 2, lty = 5, col = "green3")
## ----cmpWithoutReps-------------------------------------------------------------------------------
# Perform normalization and create bioConds to represent the two LCLs.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, c(5, 7), c(10, 12), common.peak.regions = autosome)
conds <- list(GM12891 = bioCond(norm[5], norm[10], name = "GM12891"),
GM12892 = bioCond(norm[7], norm[12], name = "GM12892"))
# Create a "blind" bioCond that treats the two samples as replicates and fit an
# MVC accordingly. Only common peak regions of the two samples are considered
# to be occupied by the "blind" bioCond, and only these regions are used to fit
# the MVC. This setting is for capturing underlying non-differential intervals
# as accurately as possible and avoiding over-estimation of prior variances
# (i.e., variances read from MVC).
conds$blind <- bioCond(norm[c(5, 7)], norm[c(10, 12)], occupy.num = 2,
name = "blind")
conds <- fitMeanVarCurve(conds, method = "parametric",
occupy.only = TRUE, init.coef = c(0.1, 10))
# Note the dramatic decrease of number of prior degrees of freedom.
summary(conds$blind)
# Visualize mean-variance trend along with the fitted MVC.
plotMeanVarCurve(conds[3], subset = "occupied", ylim = c(-7, 1))
# Perform differential tests.
res2 <- diffTest(conds[[1]], conds[[2]])
head(res2)
# Visualize the overall test results.
# We use a cutoff of raw p-value here to select significant intervals.
MAplot(res2, pval = 0.01)
abline(h = 0, lwd = 2, lty = 5, col = "green3")
## ----checkConsistency-----------------------------------------------------------------------------
cor(res$pval, res2$pval, method = "spearman")
plot(-log10(res$pval), -log10(res2$pval), col = "#0000FF14", pch = 20,
xlab = "With Reps", ylab = "Without Reps")
## ----aovBioCond-----------------------------------------------------------------------------------
# Perform within-group normalization.
norm <- normalize(H3K27Ac, count = 4, occupancy = 9)
norm <- normalize(norm, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)
# Construct a bioCond for each group of samples.
conds <- list(GM12890 = bioCond(norm[4], norm[9], name = "GM12890"),
GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))
# Perform between-group normalization.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)
# Fit an MVC.
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE)
# Perform differential tests.
res <- aovBioCond(conds)
head(res)
## ----plotAovBioCond-------------------------------------------------------------------------------
plot(res, padj = 1e-6)
## ----cmbBioCond-----------------------------------------------------------------------------------
# Use the regular routine for normalization and construction of bioConds.
norm <- normalize(H3K27Ac, count = 4, occupancy = 9)
norm <- normalize(norm, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)
conds <- list(GM12890 = bioCond(norm[4], norm[9], name = "GM12890"),
GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)
# Group LCLs into different genders.
genders <- list(male = cmbBioCond(conds[2], name = "male"),
female = cmbBioCond(conds[c(1, 3)], name = "female"))
# Fit an MVC by using local regression.
genders <- fitMeanVarCurve(genders, method = "local", occupy.only = TRUE)
summary(genders$female)
plotMeanVarCurve(genders, subset = "occupied")
# Perform differential tests.
res <- diffTest(genders[[1]], genders[[2]])
head(res)
MAplot(res, pval = 0.01)
abline(h = 0, lwd = 2, lty = 5, col = "green3")
## ----allIntervals, fig.show = "hold", fig.height = 4.7, fig.width = 4.7---------------------------
genders2 <- fitMeanVarCurve(genders, method = "local", occupy.only = FALSE)
plotMeanVarCurve(genders, subset = "non-occupied",
main = "Use occupied intervals")
plotMeanVarCurve(genders2, subset = "non-occupied",
main = "Use all intervals")
## ----reducedD0------------------------------------------------------------------------------------
genders[[1]]$fit.info$df.prior
genders2[[1]]$fit.info$df.prior
## ----reEstimateD0---------------------------------------------------------------------------------
genders3 <- estimatePriorDf(genders2, occupy.only = TRUE)
plotMeanVarCurve(genders3, subset = "non-occupied",
main = "Re-estimate prior df")
genders3[[1]]$fit.info$df.prior
## ----HyperChIP------------------------------------------------------------------------------------
# Normalize all ChIP-seq samples once for all.
# Considering the number of samples in a hypervariable ChIP-seq analysis is
# typically large, HyperChIP uses a pseudo-reference profile as baseline in the
# MA normalization process to reduce the variability of normalization results.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, count = 4:8, occupancy = 9:13,
baseline = "pseudo-reference",
common.peak.regions = autosome)
# Construct a bioCond to group all the samples.
cond <- bioCond(norm[4:8], norm[9:13], occupy.num = 1,
name = "all")
# Fit an MVC by using local regression.
# Set "nn = 1" to increase the smoothness of the resulting MVC.
cond <- fitMeanVarCurve(list(cond), method = "local",
occupy.only = TRUE, args.lp = list(nn = 1))[[1]]
summary(cond)
# Apply the parameter estimation framework of HyperChIP.
# Note the dramatic increase in the estimated number of prior degrees of
# freedom.
cond <- estParamHyperChIP(cond)
summary(cond)
# Perform statistical tests.
res <- varTestBioCond(cond)
head(res)
## ----plotVarTestBioCond, fig.show = "hold", fig.height = 4.7, fig.width = 4.7---------------------
# Visualize the overall test results.
hist(res$pval, breaks = 100, col = "red")
plot(res, padj = 0.01)
## ----hypervariableOnly----------------------------------------------------------------------------
df <- attr(res, "df")
df
one.sided.pval <- pf(res$fold.change, df[1], df[2], lower.tail = FALSE)
## ----partiallyOccupiedIntervals, fig.show = "hold", fig.height = 4.7, fig.width = 4.7-------------
n <- rowSums(norm[9:13])
x <- list(All = -log10(one.sided.pval[n == 5]),
Partially = -log10(one.sided.pval[n > 0 & n < 5]))
wilcox.test(x$All, x$Partially, alternative = "less")
boxplot(x, ylab = "-Log10(p-value)")
boxplot(x, ylab = "-Log10(p-value)", outline = FALSE)
## ----distBioCond----------------------------------------------------------------------------------
# Normalize all ChIP-seq samples once for all.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, count = 4:8, occupancy = 9:13,
baseline = "pseudo-reference",
common.peak.regions = autosome)
# Construct a bioCond to group all the samples.
cond <- bioCond(norm[4:8], norm[9:13], occupy.num = 1,
name = "all")
# Fit an MVC by using local regression.
cond <- fitMeanVarCurve(list(cond), method = "local",
occupy.only = TRUE, args.lp = list(nn = 1))[[1]]
# Measure the distance between each pair of samples.
d <- distBioCond(cond, method = "prior")
d
# Perform hierarchical clustering.
plot(hclust(d, method = "average"), hang = -1)
## ----distBioCondSubset----------------------------------------------------------------------------
# Select hypervariable genomic intervals.
cond <- estParamHyperChIP(cond)
res <- varTestBioCond(cond)
f <- res$fold.change > 1 & res$padj < 0.01
# The hierarchical structure among samples remains unmodified,
# but note the change of magnitude of the distances between cell lines.
d2 <- distBioCond(cond, subset = f, method = "prior")
d2
plot(hclust(d2, method = "average"), hang = -1)
## ----sessionInfo----------------------------------------------------------------------------------
sessionInfo()
## ----restore, include = FALSE-------------------------------------------------
options(old.ops)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.