library(knitr)
library(Cairo)
opts_chunk$set(warning=FALSE, fig.width=6, fig.height=6, dev="CairoPNG", stop=TRUE)

library(GEOquery)

Normalize a 450k dataset

Download example data set


path <- download.450k.demo.dataset()

Normalize dataset

Create samplesheet

library(meffil)

samplesheet <- meffil.create.samplesheet(path)

options(mc.cores=3)

Parameters.

qc.file <- "450k-demo/qc-report.html"
author <- "Prickett, et al."
study <- "Silver-Russell syndrome patients (GEO:GSE55491)"
norm.file <- "450k-demo/normalization-report.html"
cell.type.reference <- "blood gse35069"

Generate quality control objects.

qc.objects <- meffil.qc(samplesheet, cell.type.reference=cell.type.reference, verbose=T)

QC report.

qc.summary <- meffil.qc.summary(qc.objects, verbose=T)
meffil.qc.report(qc.summary,
                 output.file=qc.file,
                 author=author,
                 study=study)

Remove any low quality samples.

if (nrow(qc.summary$bad.samples) > 0)
    qc.objects <- meffil.remove.samples(qc.objects, qc.summary$bad.samples$sample.name)

Check how many principal components to include.

print(meffil.plot.pc.fit(qc.objects, n.cross=3)$plot)

Normalize dataset.

number.pcs <- 2
norm.objects <- meffil.normalize.quantiles(qc.objects, number.pcs=number.pcs, verbose=T)
norm.dataset <- meffil.normalize.samples(norm.objects,
                                        just.beta=F, 
                                        cpglist.remove=qc.summary$bad.cpgs$name,
                                        verbose=T)

Generate normalization report.

beta <- meffil.get.beta(norm.dataset$M, norm.dataset$U)
pcs <- meffil.methylation.pcs(beta, sites=meffil.get.autosomal.sites("450k"), verbose=T)

parameters <- meffil.normalization.parameters(norm.objects)
parameters$batch.threshold <- 0.01
norm.summary <- meffil.normalization.summary(norm.objects=norm.objects,
                                             pcs=pcs,
                                             parameters=parameters, verbose=T)
meffil.normalization.report(norm.summary,
                            output.file=norm.file,
                            author=author,
                            study=study)

Just for fun, compare cell count estimates obtained from raw data to estimates obtained from normalized beta matrix (i.e. normalized methylation levels).

counts.meffil <- t(meffil.cell.count.estimates(norm.objects))

counts.beta <- meffil.estimate.cell.counts.from.betas(beta, cell.type.reference)

for (cell.type in colnames(counts.meffil)) {
    cat(cell.type, cor(counts.meffil[,cell.type], counts.beta[,cell.type]), "\n")
}

Generate counts for saliva and see how they compare.

counts.saliva <- mclapply(qc.objects, meffil.estimate.cell.counts, cell.type.reference="saliva gse48472", verbose=T)

counts.saliva <- t(sapply(counts.saliva, function(x) x$counts))

diag(cor(counts.saliva, counts.meffil)[colnames(counts.meffil), colnames(counts.meffil)])
quantile(counts.saliva[,"Buccal"])
cor(counts.saliva, counts.meffil)["Buccal",]
cor(counts.saliva)["Buccal",]


perishky/meffil documentation built on March 20, 2024, 1:56 a.m.