inst/doc/MAnorm2_vignette.R

## ----setup, include = FALSE-----------------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = FALSE,
  comment = "#>",
  fig.height = 5,
  fig.width = 5
)

old.ops <- options(width = 100)

## ----dataset--------------------------------------------------------------------------------------
library(MAnorm2)
head(H3K27Ac)

## ----H3K27Ac--------------------------------------------------------------------------------------
library(MAnorm2)
head(H3K27Ac)

## ----H3K27AcMetaInfo------------------------------------------------------------------------------
attr(H3K27Ac, "metaInfo")

## ----cmpBioReps-----------------------------------------------------------------------------------
# Perform within-group normalization.
norm <- normalize(H3K27Ac, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)

# Construct a bioCond for each group of samples.
conds <- list(GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
              GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))

# Perform between-group normalization.
# Restrict common peak regions to autosomes only when the two groups
# being compared are associated with different genders.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)

# Fit a mean-variance curve.
# If the following function call raises an error,
# set init.coef = c(0.1, 10) or try method = "local".
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE)

# Perform differential tests.
res <- diffTest(conds[[1]], conds[[2]])
head(res)

## ----oneStepNorm----------------------------------------------------------------------------------
# One-step normalization.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, count = 5:8, occupancy = 10:13,
                  common.peak.regions = autosome)

## ----normInfo-------------------------------------------------------------------------------------
names(attributes(norm))
attributes(norm)[5:8]

# This statement requires the gplots package (>= 3.0.1).
plot(attr(norm, "MA.cor"), symbreaks = TRUE, margins = c(8, 8),
     cexRow = 1, cexCol = 1)

## ----MAplotDefault, fig.show = "hold", fig.height = 4.7, fig.width = 4.7--------------------------
# Before normalization.
raw <- log(H3K27Ac[7:8] + 0.5, base = 2)
MAplot(raw[[1]], raw[[2]], norm[[12]], norm[[13]], ylim = c(-2, 2),
       main = "Before normalization")
abline(h = 0, lwd = 2, lty = 5)

# After normalization.
MAplot(norm[[7]], norm[[8]], norm[[12]], norm[[13]], ylim = c(-2, 2),
       main = "After normalization")
abline(h = 0, lwd = 2, lty = 5)

## ----withinNorm-----------------------------------------------------------------------------------
# Within-group normalization.
norm <- normalize(H3K27Ac, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)

## ----bioCond--------------------------------------------------------------------------------------
# Construct a bioCond for each LCL.
conds <- list(GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
              GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))

## ----summaryBioCond-------------------------------------------------------------------------------
summary(conds$GM12891)

## ----betweenNorm----------------------------------------------------------------------------------
# Between-group normalization.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)

## ----MAplotBioCond--------------------------------------------------------------------------------
MAplot(conds[[1]], conds[[2]], ylim = c(-12, 12), main = "GM12891 vs. GM12892")
abline(h = 0, lwd = 2, lty = 5)

## ----fitMeanVarCurve------------------------------------------------------------------------------
# Fit an MVC.
# The "parametric" method sometimes requires the users to explicitly specify
# initial coefficients. Try setting init.coef = c(0.1, 10) in these cases.
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE)

## ----summaryMVC-----------------------------------------------------------------------------------
summary(conds$GM12891)
summary(conds$GM12892)

## ----plotMeanVarCurve-----------------------------------------------------------------------------
# Plot only occupied genomic intervals,
# as only these intervals have been used to fit the MVC.
plotMeanVarCurve(conds, subset = "occupied", ylim = c(-7, 0.5))

## ----diffTestBioCond------------------------------------------------------------------------------
res <- diffTest(conds[[1]], conds[[2]])

## ----showRes--------------------------------------------------------------------------------------
head(res)

## ----MAplotDiffBioCond----------------------------------------------------------------------------
MAplot(res, padj = 0.001)
abline(h = 0, lwd = 2, lty = 5, col = "green3")

## ----cmpWithoutReps-------------------------------------------------------------------------------
# Perform normalization and create bioConds to represent the two LCLs.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, c(5, 7), c(10, 12), common.peak.regions = autosome)
conds <- list(GM12891 = bioCond(norm[5], norm[10], name = "GM12891"),
              GM12892 = bioCond(norm[7], norm[12], name = "GM12892"))

# Create a "blind" bioCond that treats the two samples as replicates and fit an
# MVC accordingly. Only common peak regions of the two samples are considered
# to be occupied by the "blind" bioCond, and only these regions are used to fit
# the MVC. This setting is for capturing underlying non-differential intervals
# as accurately as possible and avoiding over-estimation of prior variances
# (i.e., variances read from MVC).
conds$blind <- bioCond(norm[c(5, 7)], norm[c(10, 12)], occupy.num = 2,
                       name = "blind")
conds <- fitMeanVarCurve(conds, method = "parametric",
                         occupy.only = TRUE, init.coef = c(0.1, 10))

# Note the dramatic decrease of number of prior degrees of freedom.
summary(conds$blind)

# Visualize mean-variance trend along with the fitted MVC.
plotMeanVarCurve(conds[3], subset = "occupied", ylim = c(-7, 1))

# Perform differential tests.
res2 <- diffTest(conds[[1]], conds[[2]])
head(res2)

# Visualize the overall test results.
# We use a cutoff of raw p-value here to select significant intervals.
MAplot(res2, pval = 0.01)
abline(h = 0, lwd = 2, lty = 5, col = "green3")

## ----checkConsistency-----------------------------------------------------------------------------
cor(res$pval, res2$pval, method = "spearman")
plot(-log10(res$pval), -log10(res2$pval), col = "#0000FF14", pch = 20,
     xlab = "With Reps", ylab = "Without Reps")

## ----aovBioCond-----------------------------------------------------------------------------------
# Perform within-group normalization.
norm <- normalize(H3K27Ac, count = 4, occupancy = 9)
norm <- normalize(norm, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)

# Construct a bioCond for each group of samples.
conds <- list(GM12890 = bioCond(norm[4], norm[9], name = "GM12890"),
              GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
              GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))

# Perform between-group normalization.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)

# Fit an MVC.
conds <- fitMeanVarCurve(conds, method = "parametric", occupy.only = TRUE)

# Perform differential tests.
res <- aovBioCond(conds)
head(res)

## ----plotAovBioCond-------------------------------------------------------------------------------
plot(res, padj = 1e-6)

## ----cmbBioCond-----------------------------------------------------------------------------------
# Use the regular routine for normalization and construction of bioConds.
norm <- normalize(H3K27Ac, count = 4, occupancy = 9)
norm <- normalize(norm, count = 5:6, occupancy = 10:11)
norm <- normalize(norm, count = 7:8, occupancy = 12:13)
conds <- list(GM12890 = bioCond(norm[4], norm[9], name = "GM12890"),
              GM12891 = bioCond(norm[5:6], norm[10:11], name = "GM12891"),
              GM12892 = bioCond(norm[7:8], norm[12:13], name = "GM12892"))
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
conds <- normBioCond(conds, common.peak.regions = autosome)

# Group LCLs into different genders.
genders <- list(male = cmbBioCond(conds[2], name = "male"),
                female = cmbBioCond(conds[c(1, 3)], name = "female"))

# Fit an MVC by using local regression.
genders <- fitMeanVarCurve(genders, method = "local", occupy.only = TRUE)
summary(genders$female)
plotMeanVarCurve(genders, subset = "occupied")

# Perform differential tests.
res <- diffTest(genders[[1]], genders[[2]])
head(res)
MAplot(res, pval = 0.01)
abline(h = 0, lwd = 2, lty = 5, col = "green3")

## ----allIntervals, fig.show = "hold", fig.height = 4.7, fig.width = 4.7---------------------------
genders2 <- fitMeanVarCurve(genders, method = "local", occupy.only = FALSE)
plotMeanVarCurve(genders, subset = "non-occupied",
                 main = "Use occupied intervals")
plotMeanVarCurve(genders2, subset = "non-occupied",
                 main = "Use all intervals")

## ----reducedD0------------------------------------------------------------------------------------
genders[[1]]$fit.info$df.prior
genders2[[1]]$fit.info$df.prior

## ----reEstimateD0---------------------------------------------------------------------------------
genders3 <- estimatePriorDf(genders2, occupy.only = TRUE)
plotMeanVarCurve(genders3, subset = "non-occupied",
                 main = "Re-estimate prior df")
genders3[[1]]$fit.info$df.prior

## ----HyperChIP------------------------------------------------------------------------------------
# Normalize all ChIP-seq samples once for all.
# Considering the number of samples in a hypervariable ChIP-seq analysis is
# typically large, HyperChIP uses a pseudo-reference profile as baseline in the
# MA normalization process to reduce the variability of normalization results.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, count = 4:8, occupancy = 9:13,
                  baseline = "pseudo-reference",
                  common.peak.regions = autosome)

# Construct a bioCond to group all the samples.
cond <- bioCond(norm[4:8], norm[9:13], occupy.num = 1,
                name = "all")

# Fit an MVC by using local regression.
# Set "nn = 1" to increase the smoothness of the resulting MVC.
cond <- fitMeanVarCurve(list(cond), method = "local",
                        occupy.only = TRUE, args.lp = list(nn = 1))[[1]]
summary(cond)

# Apply the parameter estimation framework of HyperChIP.
# Note the dramatic increase in the estimated number of prior degrees of
# freedom.
cond <- estParamHyperChIP(cond)
summary(cond)

# Perform statistical tests.
res <- varTestBioCond(cond)
head(res)

## ----plotVarTestBioCond, fig.show = "hold", fig.height = 4.7, fig.width = 4.7---------------------
# Visualize the overall test results.
hist(res$pval, breaks = 100, col = "red")
plot(res, padj = 0.01)

## ----hypervariableOnly----------------------------------------------------------------------------
df <- attr(res, "df")
df
one.sided.pval <- pf(res$fold.change, df[1], df[2], lower.tail = FALSE)

## ----partiallyOccupiedIntervals, fig.show = "hold", fig.height = 4.7, fig.width = 4.7-------------
n <- rowSums(norm[9:13])
x <- list(All = -log10(one.sided.pval[n == 5]),
          Partially = -log10(one.sided.pval[n > 0 & n < 5]))
wilcox.test(x$All, x$Partially, alternative = "less")
boxplot(x, ylab = "-Log10(p-value)")
boxplot(x, ylab = "-Log10(p-value)", outline = FALSE)

## ----distBioCond----------------------------------------------------------------------------------
# Normalize all ChIP-seq samples once for all.
autosome <- !(H3K27Ac$chrom %in% c("chrX", "chrY"))
norm <- normalize(H3K27Ac, count = 4:8, occupancy = 9:13,
                  baseline = "pseudo-reference",
                  common.peak.regions = autosome)

# Construct a bioCond to group all the samples.
cond <- bioCond(norm[4:8], norm[9:13], occupy.num = 1,
                name = "all")

# Fit an MVC by using local regression.
cond <- fitMeanVarCurve(list(cond), method = "local",
                        occupy.only = TRUE, args.lp = list(nn = 1))[[1]]

# Measure the distance between each pair of samples.
d <- distBioCond(cond, method = "prior")
d

# Perform hierarchical clustering.
plot(hclust(d, method = "average"), hang = -1)

## ----distBioCondSubset----------------------------------------------------------------------------
# Select hypervariable genomic intervals.
cond <- estParamHyperChIP(cond)
res <- varTestBioCond(cond)
f <- res$fold.change > 1 & res$padj < 0.01

# The hierarchical structure among samples remains unmodified,
# but note the change of magnitude of the distances between cell lines.
d2 <- distBioCond(cond, subset = f, method = "prior")
d2
plot(hclust(d2, method = "average"), hang = -1)

## ----sessionInfo----------------------------------------------------------------------------------
sessionInfo()

## ----restore, include = FALSE-------------------------------------------------
options(old.ops)

Try the MAnorm2 package in your browser

Any scripts or data that you put into this service are public.

MAnorm2 documentation built on Oct. 29, 2022, 1:12 a.m.