Nothing
lumiMethyR <- function(filename, lib=NULL, controlData=NULL, qcfile=NULL, sampleDescriptions=NULL,
sep = NULL) {
methyLumiSet <- methylumiR(filename, qcfile=qcfile, sampleDescriptions=sampleDescriptions, sep=sep)
methyLumiM <- as(methyLumiSet, "MethyLumiM")
if (!is.null(lib)) {
# methyLumiM <- addColorChannelInfo(methyLumiM, lib=lib)
methyLumiM <- addAnnotationInfo(methyLumiM, lib=lib)
}
if (!is.null(controlData)) {
if (is.character(controlData)) {
controlData <- methylumiR(controlData)
}
if (is(controlData, "MethyLumiQC")) {
controlData(methyLumiM) <- controlData
} else {
cat("Provided controlData is not supported!\n")
}
}
return(methyLumiM)
}
## import Illumina Infinium methylation IDAT files based on barcodes.
## This is an extension of the methylumi::lumIDAT function
importMethyIDAT <- function(sampleInfo, dataPath=getwd(), lib=NULL, bigMatrix=FALSE, dir.bigMatrix='.', savePrefix.bigMatrix, ...) {
if (missing(sampleInfo)) {
stop('Please provide "sampleInfo"!')
}
if (is.character(sampleInfo)) {
barcodes <- sampleInfo
sampleInfo <- NULL
barcodeInfo <- strsplit(barcodes, split='_')
if (!all(sapply(barcodeInfo, length) == 2)) {
warning('Some barcodes not in the correct format!')
barcodeInfo <- lapply(barcodeInfo, function(x) {
if (length(x) == 1) {
x <- c(x, '')
} else {
x <- x[1:2]
}
return(x)
})
}
barcodeInfo <- matrix(unlist(barcodeInfo), ncol=2)
colnames(barcodeInfo) <- c('SENTRIX_BARCODE', 'SENTRIX_POSITION')
} else {
colnames(sampleInfo) <- toupper(colnames(sampleInfo))
if (any(colnames(sampleInfo) == 'SENTRIX_ID') && !('SENTRIX_BARCODE' %in% colnames(sampleInfo))) {
colnames(sampleInfo)[colnames(sampleInfo) == 'SENTRIX_ID'] <- 'SENTRIX_BARCODE'
}
if (!all(c('SENTRIX_BARCODE', 'SENTRIX_POSITION') %in% colnames(sampleInfo))) {
stop("'SENTRIX_POSITION' and 'SENTRIX_BARCODE' or 'SENTRIX_ID' and are required in 'sampleInfo'!")
}
barcodeInfo <- sampleInfo[,c('SENTRIX_BARCODE', 'SENTRIX_POSITION')]
barcodes <- paste(barcodeInfo[, 'SENTRIX_BARCODE'], barcodeInfo[, 'SENTRIX_POSITION'], sep='_')
}
## read IDAT files
## check the folder location
## read data path could be either dataPath or dataPath/SENTRIX_BARCODE
realDataPath <- dataPath
for (i in 1:length(barcodes)) {
file.pattern.i <- paste(barcodes[i], '.*\\.idat$', sep='')
data.path.i <- file.path(dataPath, barcodeInfo[i, 'SENTRIX_BARCODE'])
if (length(dir(data.path.i, pattern=file.pattern.i, ignore.case=T)) == 2) {
realDataPath[i] <- data.path.i
} else if (length(dir(dataPath, pattern=file.pattern.i, ignore.case=T)) != 2) {
realDataPath[i] <- NA
}
}
if (any(is.na(realDataPath))) {
stop(paste('IDAT files cannot be located for barcodes:', paste(barcodes[is.na(realDataPath)], collapse=',')))
}
## read IDAT files by individual dataPath
barcodesList <- split(barcodes, factor(realDataPath, levels=unique(realDataPath)))
if (bigMatrix) {
## read first batch of samples and create a BigMatrix with all sample information
## Then fill in the rest sample information
if (missing(savePrefix.bigMatrix)) {
warnings('savePrefix.bigMatrix is missing! bigMatrixFiles is used!')
savePrefix.bigMatrix <- 'bigMatrixFiles'
}
for (i in 1:length(barcodesList)) {
lumi450k.i <- lumIDAT(barcodesList[[i]], idatPath=names(barcodesList)[i], ...)
if (i == 1) {
dimNames <- list(featureNames(lumi450k.i), barcodes)
## predefine a big matrix
lumi450k <- lumi::asBigMatrix(lumi450k.i, nCol=length(barcodes), dimNames=dimNames, saveDir=dir.bigMatrix, savePrefix=savePrefix.bigMatrix)
} else {
## fill in data information of new samples
for (ad.name in assayDataElementNames(lumi450k.i)) {
matrixAll.i <- assayDataElement(lumi450k, ad.name)
if (is.null(matrixAll.i)) next
matrix.i <- assayDataElement(lumi450k.i, ad.name)
matrixAll.i[, colnames(matrix.i)] <- matrix.i
}
}
}
} else {
lumi450k <- lapply(1:length(barcodesList), function(i) lumIDAT(barcodesList[[i]], idatPath=names(barcodesList)[i], ...)) # return MethyLumiM object
suppressWarnings(lumi450k <- do.call('combine', lumi450k))
}
## add sample info
if (!is.null(sampleInfo)) {
## update the barcodes, the order may change after split barcodes to barcodesList
barcodes <- colnames(lumi450k)
rownames(sampleInfo) <- paste(barcodeInfo[, 'SENTRIX_BARCODE'], barcodeInfo[, 'SENTRIX_POSITION'], sep='_')
pData(lumi450k) <- sampleInfo[barcodes,]
## rename the samples if SAMPLE_NAME is provided in sampleInfo
if (!is.null(sampleInfo$SAMPLE_NAME)) {
samplename <- make.unique(sampleInfo[barcodes,'SAMPLE_NAME'])
sampleNames(lumi450k) <- as.character(samplename)
}
}
if (!is.null(lib)) {
## add annotation information: 'COLOR_CHANNEL', 'CHROMOSOME', 'POSITION'
lumi450k <- addAnnotationInfo(lumi450k, lib=lib)
}
return(lumi450k)
}
addAnnotationInfo <- function(methyLumiM, lib='FDb.InfiniumMethylation.hg19', annotationColumn=c('COLOR_CHANNEL', 'CHROMOSOME', 'POSITION')) {
if (is(methyLumiM, 'MethyLumiM')) {
# retrieve feature data
ff <- fData(methyLumiM)
probeList <- featureNames(methyLumiM)
} else if (is.character(methyLumiM)) {
probeList <- methyLumiM
ff <- data.frame()
} else {
stop('methyLumiM should be either a MethyLumiM object of a character vectors of probe names!')
}
if (!is.null(lib) && require(lib, character.only=TRUE)) {
## For FeatureDb annotation libraries
ff <- NULL
if (exists(lib)) {
lib <- get(lib)
if (is(lib, 'FeatureDb')) {
allAnnotation <- features(lib)
}
ff <- data.frame(ProbeID=probeList, CHROMOSOME=NA, POSITION=NA, COLOR_CHANNEL=NA)
rownames(ff) <- probeList
if (any(!(probeList %in% names(allAnnotation)))) {
missingProbe <- probeList[!(probeList %in% names(allAnnotation))]
probeList <- probeList[probeList %in% names(allAnnotation)]
warnings(paste(paste(missingProbe, collapse=','), 'probes do not exist in the annotation library!'))
}
allAnnotation <- allAnnotation[probeList]
ff[probeList, 'CHROMOSOME'] <- as.character(seqnames(allAnnotation))
ff[probeList, 'POSITION'] <- as.numeric(IRanges::start(allAnnotation))
ff[probeList, 'COLOR_CHANNEL'] <- as.character(allAnnotation$channel450)
}
if (is.null(ff)) {
## For old annotation libraries: IlluminaHumanMethylation450k.db
colorInfo <- chr <- loc <- rep(NA, length(probeList))
names(colorInfo) <- names(chr) <- names(loc) <- probeList
obj <- get(paste(sub("\\.db$", "", lib), "COLORCHANNEL", sep=""))
pp <- probeList[probeList %in% keys(obj)]
colorInfo[pp] <- sapply(AnnotationDbi::mget(pp, obj), function(x) x[1])
ff$COLOR_CHANNEL <- colorInfo
## Check whether multiple versions of chromosome information is available,
## If so, only use the latest version.
chrPattern <- paste(sub("\\.db$", "", lib), "CHR", sep="")
chrObjs <- ls(paste("package:", lib, sep=""), pattern=paste(chrPattern, "[0-9]+$", sep=""))
if (length(chrObjs) > 1) {
chrVersion <- sub(".*[^0-9]([0-9]+)$", "\\1", chrObjs)
obj <- get(chrObjs[which.max(as.numeric(chrVersion))])
} else {
obj <- get(chrPattern)
}
pp <- probeList[probeList %in% keys(obj)]
chr[pp] <- sapply(AnnotationDbi::mget(pp, obj), function(x) x[1])
ff$CHROMOSOME <- as.character(chr)
obj <- get(paste(sub("\\.db$", "", lib), "CPGCOORDINATE", sep=""))
pp <- probeList[probeList %in% keys(obj)]
loc[pp] <- sapply(AnnotationDbi::mget(pp, obj), function(x) x[1])
ff$POSITION <- as.numeric(as.character(loc))
}
} else {
if (all(c('CHR', 'MAPINFO') %in% names(ff))) {
ff$CHROMOSOME <- ff$CHR
ff$POSITION <- as.numeric(ff$MAPINFO)
}
if (!all(c('CHROMOSOME', 'POSITION', 'COLOR_CHANNEL') %in% names(ff))) {
stop('Probe annotation information is not available. Please provide annotation library!')
}
}
if (!is.data.frame(ff)) ff <- as.data.frame(ff, stringsAsFactors=FALSE)
rownames(ff) <- ff$ProbeID
if (is(methyLumiM, 'MethyLumiM')) {
fData(methyLumiM) <- ff
return(methyLumiM)
} else {
return(ff)
}
}
addControlData2methyLumiM <- function(controlData, methyLumiM, checkConsistency = TRUE, ...)
{
if (missing(methyLumiM) || missing(controlData)) stop('Both controlData and methyLumiM are required!')
if (is.character(controlData)) {
controlData <- methylumiR(controlData, ...)
}
if (is(controlData, "MethyLumiQC")) {
## Match the column names of controlData and LumiBatch object
## Only keep the samples matching methyLumiM
if (checkConsistency) {
sampleID <- sampleNames(methyLumiM)
controlSampleID <- sampleNames(controlData)
if (all(sampleID %in% controlSampleID)) {
controlData(methyLumiM) <- controlData[, sampleID]
} else {
stop('SampleNames do not match up between controlData and methyLumiM!')
}
} else {
controlData(methyLumiM) <- controlData[, sampleID]
}
} else {
cat("Provided controlData is not supported!\n")
}
return(methyLumiM)
}
# normalization
lumiMethyN <- function(methyLumiM, method = c('quantile', 'ssn', 'none'), separateColor=FALSE, verbose=TRUE, overwriteBigMatrix=FALSE, ...)
{
if (!is.function(method)) method <- match.arg(method)
if (!is(methyLumiM, 'MethyLumiM') && !is(methyLumiM, 'MethyGenoSet')) {
stop('The object should be class "MethyLumiM" or "MethyGenoSet"!')
}
if (is(assayDataElement(methyLumiM, 'exprs'), "BigMatrix") && !overwriteBigMatrix) {
stop('BigMatrix was identified in the AssayData, please set overwriteBigMatrix parameter as TRUE if you want to overwrite the existing data.')
}
history.submitted <- as.character(Sys.time())
if (!(is.function(method))) {
if (!(method %in% c('ssn', 'rssn', 'quantile', 'none'))) {
cat('This method is not supported!\n')
return(methyLumiM)
} else if (method == 'none') {
return(methyLumiM)
}
if (verbose) cat(paste('Perform', method, 'normalization ...\n'))
} else {
if (verbose) cat('Perform user provided normalization ...\n')
}
if (is.function(method)) {
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
if (separateColor) {
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
combData <- rbind(methy, unmethy)
processed.comb <- method(combData, ...)
if (!is(processed.comb, 'matrix')) stop("The return of user defined method should be a matrix!\n")
methy <- processed.comb[1:(nrow(processed.comb)/2), ]
unmethy <- processed.comb[(nrow(processed.comb)/2+1):nrow(processed.comb), ]
} else {
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
allRed <- rbind(methy[allRedInd,], unmethy[allRedInd,])
allGrn <- rbind(methy[allGrnInd,], unmethy[allGrnInd,])
processed.red <- method(allRed, ...)
if (!is(processed.red, 'matrix')) stop("The return of user defined method should be a matrix!\n")
processed.grn <- method(allGrn, ...)
methy[allRedInd,] <- processed.red[1:(nrow(processed.red)/2), ]
unmethy[allRedInd,] <- processed.red[(nrow(processed.red)/2+1):nrow(processed.red), ]
methy[allGrnInd,] <- processed.grn[1:(nrow(processed.grn)/2), ]
unmethy[allGrnInd,] <- processed.grn[(nrow(processed.grn)/2+1):nrow(processed.grn), ]
}
} else {
combData <- rbind(methy, unmethy)
processed.comb <- method(combData, ...)
if (!is(processed.comb, 'matrix')) stop("The return of user defined method should be a matrix!\n")
methy <- processed.comb[1:(nrow(processed.comb)/2), ]
unmethy <- processed.comb[(nrow(processed.comb)/2+1):nrow(processed.comb), ]
}
methylated(methyLumiM) <- methy
unmethylated(methyLumiM) <- unmethy
methyLumiM <- estimateM(methyLumiM)
} else {
if (method == 'quantile') {
methyLumiM <- normalizeMethylation.quantile(methyLumiM, separateColor=separateColor, ...)
} else if (method == 'ssn') {
methyLumiM <- normalizeMethylation.ssn(methyLumiM, separateColor=separateColor, ...)
#} else if (method == 'rssn') {
# methyLumiM <- normalizeMethylation.robust.ssn(methyLumiM, separateColor=separateColor, ...)
}
}
history.finished <- as.character(Sys.time())
# history.command <- capture.output(print(match.call(lumiMethyN)))
history.command <- paste(deparse(match.call(lumiMethyN)), collapse='')
lumiVersion <- packageDescription('lumi')$Version
methyLumiM@history<- rbind(methyLumiM@history, data.frame(submitted=history.submitted,
finished=history.finished, command=history.command, lumiVersion=lumiVersion))
return(methyLumiM)
}
# color balance adjustmentdim
lumiMethyC <- function(methyLumiM, method = c('quantile', 'ssn', 'none'), verbose=TRUE, overwriteBigMatrix=FALSE, ...)
{
if (!is.function(method)) method <- match.arg(method)
if (!is(methyLumiM, 'MethyLumiM') && !is(methyLumiM, 'MethyGenoSet')) {
stop('The object should be class "MethyLumiM" or "MethyGenoSet"!')
}
if (is(assayDataElement(methyLumiM, 'exprs'), "BigMatrix") && !overwriteBigMatrix) {
stop('BigMatrix was identified in the AssayData, please set overwriteBigMatrix parameter as TRUE if you want to overwrite the existing data.')
}
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL))
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
history.submitted <- as.character(Sys.time())
if (!(is.function(method))) {
if (!(method %in% c('quantile', 'ssn', 'none'))) {
cat('This method is not supported!\n')
return(methyLumiM)
} else if (method == 'none') {
return(methyLumiM)
}
if (verbose) cat(paste('Perform', method, 'color balance adjustment ...\n'))
} else {
if (verbose) cat('Perform user provided color balance adjustment ...\n')
}
if (is.function(method)) {
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
allRed <- rbind(methy[allRedInd,], unmethy[allRedInd,])
allGrn <- rbind(methy[allGrnInd,], unmethy[allGrnInd,])
processedData <- method(allRed, allGrn, ...)
processed.red <- processedData$red
if (is.null(processed.red)) stop("The return of user defined method should be a list including 'red' and 'green' matrix!\n")
processed.grn <- processedData$green
methy[allRedInd,] <- processed.red[1:(nrow(processed.red)/2), ]
unmethy[allRedInd,] <- processed.red[(nrow(processed.red)/2+1):nrow(processed.red), ]
methy[allGrnInd,] <- processed.grn[1:(nrow(processed.grn)/2), ]
unmethy[allGrnInd,] <- processed.grn[(nrow(processed.grn)/2+1):nrow(processed.grn), ]
methylated(methyLumiM) <- methy
unmethylated(methyLumiM) <- unmethy
methyLumiM <- estimateM(methyLumiM)
} else {
if (method == 'quantile') {
methyLumiM <- adjColorBias.quantile(methyLumiM, verbose=verbose, ...)
} else if (method == 'ssn') {
methyLumiM <- adjColorBias.ssn(methyLumiM, ...)
}
}
history.finished <- as.character(Sys.time())
# history.command <- capture.output(print(match.call(lumiMethyC)))
history.command <- paste(deparse(match.call(lumiMethyC)), collapse='')
lumiVersion <- packageDescription('lumi')$Version
methyLumiM@history<- rbind(methyLumiM@history, data.frame(submitted=history.submitted,
finished=history.finished, command=history.command, lumiVersion=lumiVersion))
return(methyLumiM)
}
lumiMethyB <- function(methyLumiM, method = c('bgAdjust2C', 'forcePositive', 'none'), separateColor=FALSE, verbose=TRUE, overwriteBigMatrix=FALSE, ...)
{
if (!is.function(method)) method <- match.arg(method)
if (is(methyLumiM, 'MethyLumiM') || is(methyLumiM, 'MethyGenoSet')) {
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
} else {
stop('The object should be class "MethyLumiM" or "MethyGenoSet" inherited!')
}
if (is(assayDataElement(methyLumiM, 'exprs'), "BigMatrix") && !overwriteBigMatrix) {
stop('BigMatrix was identified in the AssayData, please set overwriteBigMatrix parameter as TRUE if you want to overwrite the existing data.')
}
history.submitted <- as.character(Sys.time())
if (!(is.function(method))) {
if (!(method %in% c('bgAdjust2C', 'none', 'forcePositive'))) {
cat('This method is not supported!\n')
return(methyLumiM)
} else if (method == 'none') {
return(methyLumiM)
} else if (method == 'forcePositive') {
mm <- min(c(min(methy, na.rm=TRUE), min(unmethy, na.rm=TRUE)))
if (mm > 0) return(methyLumiM)
}
if (verbose) cat(paste('Perform', method, 'background correction ...\n'))
} else {
if (verbose) cat('Perform user provided background correction ...\n')
}
if (is.function(method)) {
if (separateColor) {
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
combData <- rbind(methy, unmethy)
bgAdj.comb <- method(combData, ...)
if (!is(bgAdj.comb, 'matrix')) stop("The return of user defined method should be a matrix!\n")
methy <- bgAdj.comb[1:(nrow(bgAdj.comb)/2), ]
unmethy <- bgAdj.comb[(nrow(bgAdj.comb)/2+1):nrow(bgAdj.comb), ]
} else {
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
allRed <- rbind(methy[allRedInd,], unmethy[allRedInd,])
allGrn <- rbind(methy[allGrnInd,], unmethy[allGrnInd,])
bgAdj.red <- method(allRed, ...)
if (!is(bgAdj.red, 'matrix')) stop("The return of user defined method should be a matrix!\n")
bgAdj.grn <- method(allGrn, ...)
methy[allRedInd,] <- bgAdj.red[1:(nrow(bgAdj.red)/2), ]
unmethy[allRedInd,] <- bgAdj.red[(nrow(bgAdj.red)/2+1):nrow(bgAdj.red), ]
methy[allGrnInd,] <- bgAdj.grn[1:(nrow(bgAdj.grn)/2), ]
unmethy[allGrnInd,] <- bgAdj.grn[(nrow(bgAdj.grn)/2+1):nrow(bgAdj.grn), ]
}
} else {
combData <- rbind(methy, unmethy)
bgAdj.comb <- method(combData, ...)
if (!is(bgAdj.comb, 'matrix')) stop("The return of user defined method should be a matrix!\n")
methy <- bgAdj.comb[1:(nrow(bgAdj.comb)/2), ]
unmethy <- bgAdj.comb[(nrow(bgAdj.comb)/2+1):nrow(bgAdj.comb), ]
}
methylated(methyLumiM) <- methy
unmethylated(methyLumiM) <- unmethy
} else {
if (method == 'bgAdjust2C') {
methyLumiM <- bgAdjustMethylation(methyLumiM, separateColor=separateColor, ...)
} else if (method == 'forcePositive') {
x.matrix <- rbind(methy, unmethy)
offset <- apply(x.matrix, 2, min, na.rm=TRUE)
offset[offset <= 0] <- offset[offset <= 0] - 0.01
offset[offset > 0] <- 0
offset <- rep(1, nrow(x.matrix)) %*% t(offset)
x.matrix <- x.matrix - offset
methylated(methyLumiM) <- x.matrix[1:nrow(methy),]
unmethylated(methyLumiM) <- x.matrix[(nrow(unmethy)+1):nrow(x.matrix),]
}
}
methyLumiM <- estimateM(methyLumiM)
history.finished <- as.character(Sys.time())
# history.command <- capture.output(print(match.call(lumiMethyB)))
history.command <- paste(deparse(match.call(lumiMethyB)), collapse='')
lumiVersion <- packageDescription('lumi')$Version
methyLumiM@history<- rbind(methyLumiM@history, data.frame(submitted=history.submitted,
finished=history.finished, command=history.command, lumiVersion=lumiVersion))
return(methyLumiM)
}
bgAdjustMethylation <- function(methyLumiM, separateColor=FALSE, targetBGLevel=300, negPercTh=0.25) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
methy <- methylated(methyLumiM)
unmethy <- unmethylated(methyLumiM)
## check whether control data is available in the MethyLumiM object
bglevel <- estimateMethylationBG(methyLumiM, separateColor=separateColor)
if (is.null(colnames(bglevel))) separateColor <- FALSE
if (separateColor) {
bg.red <- bglevel[,'red']
bg.grn <- bglevel[,'green']
annotation <- pData(featureData(methyLumiM))
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
## For 450K chip, the Infinium II type only has one type of probe for each CpG site.
## The color channel will depend on the methylation status
allBothInd <- which(annotation$COLOR_CHANNEL == 'Both' | annotation$COLOR_CHANNEL == '')
methy[allRedInd, ] <- methy[allRedInd, ] - rep(1, length(allRedInd)) %*% t(bg.red)
methy[allGrnInd, ] <- methy[allGrnInd, ] - rep(1, length(allGrnInd)) %*% t(bg.grn)
unmethy[allRedInd, ] <- unmethy[allRedInd, ] - rep(1, length(allRedInd)) %*% t(bg.red)
unmethy[allGrnInd, ] <- unmethy[allGrnInd, ] - rep(1, length(allGrnInd)) %*% t(bg.grn)
if (length(allBothInd) > 0) {
## the methylated probe has 'Grn' color, while the unmethylated probe has 'Red' color
unmethy[allBothInd, ] <- unmethy[allBothInd,] - rep(1, length(allBothInd)) %*% t(bg.red)
methy[allBothInd, ] <- methy[allBothInd,] - rep(1, length(allBothInd)) %*% t(bg.grn)
}
} else {
if (is.matrix(bglevel)) bglevel <- rowMeans(bglevel)
if (is(methy, 'BigMatrix')) {
for (i in 1:ncol(methy)) {
methy[,i] <- methy[,i] - bglevel[i]
unmethy[,i] <- unmethy[,i] - bglevel[i]
}
} else {
methy <- methy - rep(1, nrow(methy)) %*% t(bglevel)
unmethy <- unmethy - rep(1, nrow(unmethy)) %*% t(bglevel)
}
}
## check possible error of BG estimation model
if (is(methy, 'BigMatrix')) {
negPerc.methy <- bigmemoryExtras::apply(methy, 2, function(x) length(which(x < 0))/nrow(methy))
negPerc.unmethy <- bigmemoryExtras::apply(unmethy, 2, function(x) length(which(x < 0))/nrow(unmethy))
} else {
negPerc.methy <- apply(methy, 2, function(x) length(which(x < 0))/nrow(methy))
negPerc.unmethy <- apply(unmethy, 2, function(x) length(which(x < 0))/nrow(unmethy))
}
negPerc <- pmax(negPerc.methy, negPerc.unmethy)
if (any(negPerc > negPercTh)) {
warning("Possible bad quality samples or possible error of Background estimation model: \n")
cat("\t", paste(colnames(methy)[negPerc > negPercTh], collapse=", "), "\n")
}
if (is(methy, 'BigMatrix')) {
for (i in 1:ncol(methy)) {
methy[,i] <- methy[,i] + targetBGLevel
unmethy[,i] <- unmethy[,i] + targetBGLevel
}
} else {
methy <- methy + targetBGLevel
unmethy <- unmethy + targetBGLevel
}
methylated(methyLumiM) <- methy
unmethylated(methyLumiM) <- unmethy
if (is(methyLumiM, "MethyLumiM") || is(methyLumiM, 'MethyGenoSet')) methyLumiM <- estimateM(methyLumiM)
attr(methyLumiM, "EstimatedBG") <- list(rawBG=bglevel, adjBG=rep(targetBGLevel, ncol(methy)))
return(methyLumiM)
}
## ---------------------------------------------------------------
# methy.raw1.adj <- adjColorBias(methy.raw1)
# methy.raw2.adj <- adjColorBias(methy.raw2)
# methy.adj <- cbind(methylated(methy.raw1.adj), methylated(methy.raw2.adj))
# unmethy.adj <- cbind(unmethylated(methy.raw1.adj), unmethylated(methy.raw2.adj))
# M.adj <- log2(methy.adj/unmethy.adj)
adjColorBias.ssn <- function(methyLumiM, refChannel=c("green", "red", "mean")) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
if (storageMode(assayData(methyLumiM)) == "environment") storageMode(assayData(methyLumiM)) <- "lockedEnvironment"
refChannel <- match.arg(refChannel)
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
if (is.null(unmethy) || is.null(methy)) stop("methylated or unmethylated data is not included in the dataset!\n")
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
return(methyLumiM)
}
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
## For 450K chip, the Infinium II type only has one type of probe for each CpG site.
## The color channel will depend on the methylation status
allBothInd <- which(annotation$COLOR_CHANNEL == 'Both' | annotation$COLOR_CHANNEL == '')
bg <- estimateMethylationBG(methyLumiM, separateColor=TRUE)
bg.red <- bg[,"red"]
bg.grn <- bg[,"green"]
## To be compatible with BigMatrix data
for (i in 1:ncol(methy)) {
intensity.grn <- unmethy[allGrnInd,i] + methy[allGrnInd,i]
intensity.red <- unmethy[allRedInd,i] + methy[allRedInd,i]
m.int.grn <- mean(intensity.grn) - bg.grn[i]
m.int.red <- mean(intensity.red) - bg.red[i]
if (refChannel == 'green') {
m.ref <- m.int.grn
bg.ref <- bg.grn[i]
} else if (refChannel == 'red') {
m.ref <- m.int.red
bg.ref <- bg.red[i]
} else {
m.ref <- (m.int.grn + m.int.red)/2
bg.ref <- (bg.grn[i] + bg.red[i])/2
}
unmethy[allRedInd, i] <- unmethy[allRedInd,i] - bg.red[i] * m.ref / m.int.red + bg.ref
unmethy[allGrnInd, i] <- unmethy[allGrnInd,i] - bg.grn[i] * m.ref / m.int.grn + bg.ref
methy[allRedInd, i] <- methy[allRedInd,i] - bg.red[i] * m.ref / m.int.red + bg.ref
methy[allGrnInd, i] <- methy[allGrnInd,i] - bg.grn[i] * m.ref / m.int.grn + bg.ref
if (length(allBothInd) > 0) {
## the methylated probe has 'Grn' color, while the unmethylated probe has 'Red' color
unmethy[allBothInd, i] <- unmethy[allBothInd,i] - bg.red[i] * m.ref / m.int.red + bg.ref
methy[allBothInd, i] <- methy[allBothInd,i] - bg.grn[i] * m.ref / m.int.grn + bg.ref
}
}
methy.adj <- methyLumiM
assayDataElement(methy.adj, 'unmethylated') <- unmethy
assayDataElement(methy.adj, 'methylated') <- methy
if (is(methy.adj, "MethyLumiM") || is(methy.adj, 'MethyGenoSet')) methy.adj <- estimateM(methy.adj)
return(methy.adj)
}
adjColorBias.quantile <- function(methyLumiM, refChannel=c("green", "red"), logMode=TRUE, verbose=TRUE, ...) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
if (storageMode(assayData(methyLumiM)) == "environment") storageMode(assayData(methyLumiM)) <- "lockedEnvironment"
refChannel <- match.arg(refChannel)
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
return(methyLumiM)
}
redInd <- annotation$COLOR_CHANNEL == 'Red'
grnInd <- annotation$COLOR_CHANNEL == 'Grn'
## For 450K chip, the Infinium II type only has one type of probe for each CpG site.
## The color channel will depend on the methylation status
bothInd <- which(annotation$COLOR_CHANNEL == 'Both' | annotation$COLOR_CHANNEL == '')
bandwidth <- ifelse(logMode, 0.3, 200)
for (i in 1:ncol(unmethy)) {
if (verbose) cat('Processing sample', colnames(unmethy)[i], '...\n')
red.a.i <- unmethy[redInd, i]
red.b.i <- methy[redInd, i]
red.i <- c(red.a.i, red.b.i)
grn.a.i <- unmethy[grnInd, i]
grn.b.i <- methy[grnInd, i]
grn.i <- c(grn.a.i, grn.b.i)
if (refChannel == 'green') {
## For 450K data
if (length(bothInd) > 0) {
## the methylated probe has 'Grn' color, while the unmethylated probe has 'Red' color
y.out.i <- smoothQuantileNormalization(red.i, grn.i, adjData=c(red.i, unmethy[bothInd,i]), logMode=logMode, bandwidth=bandwidth, verbose=verbose, ...)
unmethy[bothInd,i] <- y.out.i[(length(red.i)+1):length(y.out.i)]
# y.out.i <- smoothQuantileNormalization(red.i, grn.i, adjData=c(red.i, methy[bothInd,i]), logMode=logMode, bandwidth=bandwidth, verbose=verbose, ...)
# methy[bothInd,i] <- y.out.i[(length(red.i)+1):length(y.out.i)]
} else {
y.out.i <- smoothQuantileNormalization(red.i, grn.i, logMode=logMode, bandwidth=bandwidth, verbose=verbose, ...)
}
unmethy[redInd, i] <- y.out.i[1:length(red.a.i)]
methy[redInd, i] <- y.out.i[(length(red.a.i)+1):length(red.i)]
} else {
if (length(bothInd) > 0) {
## the methylated probe has 'Grn' color, while the unmethylated probe has 'Red' color
y.out.i <- smoothQuantileNormalization(grn.i, red.i, adjData=c(grn.i, methy[bothInd,i]), logMode=logMode, bandwidth=bandwidth, verbose=verbose, ...)
methy[bothInd,i] <- y.out.i[(length(grn.i)+1):length(y.out.i)]
} else {
y.out.i <- smoothQuantileNormalization(grn.i, red.i, logMode=logMode, bandwidth=bandwidth, verbose=verbose, ...)
}
unmethy[grnInd, i] <- y.out.i[1:length(grn.a.i)]
methy[grnInd, i] <- y.out.i[(length(grn.a.i)+1):length(grn.i)]
}
}
methy.adj <- methyLumiM
assayDataElement(methy.adj, 'unmethylated') <- unmethy
assayDataElement(methy.adj, 'methylated') <- methy
if (is(methy.adj, "MethyLumiM") || is(methy.adj, 'MethyGenoSet')) methy.adj <- estimateM(methy.adj)
return(methy.adj)
}
smoothQuantileNormalization <- function(dataMatrix, ref=NULL, adjData=NULL, logMode=TRUE, bandwidth=NULL, degree=1, verbose=FALSE, ...) {
# require(KernSmooth)
bandwidth <- ifelse(logMode, 0.3, 200)
if (!is.matrix(dataMatrix)) dataMatrix <- matrix(dataMatrix, ncol=1)
if (is.null(ref)) {
# normData <- normalize.quantiles.robust(dataMatrix + 0.0)
normData <- limma::normalizeQuantiles(dataMatrix)
ref <- normData[,1]
}
if (!is.null(adjData)) {
if (!is.matrix(adjData)) adjData <- matrix(adjData, ncol=1)
if (ncol(dataMatrix) != ncol(adjData))
stop("The number of columns of adjData should be consistent with dataMatrix!")
}
interpolationMode <- ifelse(length(ref) != nrow(dataMatrix), TRUE, FALSE)
if (logMode) {
## remove those equal or less than 0, which are unreliable values
if (interpolationMode) ref <- ref[ref > 0]
if (min(ref, na.rm=TRUE) < 1) ref <- ref - min(ref, na.rm=TRUE) + 1
ref <- log2(ref)
}
## remove NA in ref
ref <- ref[!is.na(ref)]
len <- nrow(dataMatrix)
refLen <- length(ref)
gridsize <- min(min(len, refLen)/2, 1000)
# In the case of different lengthes between reference and data, interpolation will be performed.
if (interpolationMode) {
x <- (1:refLen)/refLen
y <- sort(ref, decreasing=FALSE)
} else {
y <- sort(ref, decreasing=FALSE)
}
# smoothing the quantile normalization results
if (is.null(adjData)) {
normData <- dataMatrix
} else {
normData <- adjData
}
for (i in 1:ncol(dataMatrix)) {
# if (verbose) cat('Processing sample', colnames(dataMatrix)[i], '...\n')
profile.i <- dataMatrix[,i]
profile.naInd.i <- which(is.na(profile.i))
if (length(profile.naInd.i) > 0)
profile.i <- profile.i[-profile.naInd.i]
if (!is.null(adjData)) {
adjData.i <- adjData[,i]
} else {
adjData.i <- profile.i
}
## remove possible NAs
naInd.i <- is.na(adjData.i)
adjData.i <- adjData.i[!naInd.i]
if (logMode) {
mm.i <- min(c(profile.i, adjData.i), na.rm=TRUE)
if (mm.i < 1) {
adjData.i <- log2(adjData.i - mm.i + 1)
profile.i <- log2(profile.i - mm.i + 1)
} else {
adjData.i <- log2(adjData.i)
profile.i <- log2(profile.i)
}
}
if (interpolationMode) {
## remove those equal or less than 0, which are unreliable values
selProfile.i <- profile.i[profile.i > 0]
len <- length(selProfile.i)
# perform linear interpolation when the length of two profiles different
x.out.i <- rank(selProfile.i)/len
y.out.i <- approx(x=x, y=y, xout=x.out.i, method="linear", rule=2)$y
} else {
selProfile.i <- profile.i
y.out.i <- y[rank(selProfile.i)]
}
## remove outliers points at two ends
rank.i <- rank(selProfile.i, ties.method='min')
lowInd.i <- which(rank.i <= 500)
highInd.i <- which(rank.i > max(rank.i) - 500)
#boundary.i <- quantile(selProfile.i, c(0.3, 0.7))
#lowInd.i <- selProfile.i < boundary.i[1]
#highInd.i <- selProfile.i > boundary.i[2]
## fit the low segment
suppressWarnings(rlm.i <- rlm(selProfile.i[lowInd.i], y.out.i[lowInd.i]))
dd.i <- y.out.i[lowInd.i] - rlm.i$fitted.values
outlier.ind.low.i <- lowInd.i[which(abs(dd.i) > 3 * sd(dd.i))]
## fit the high segment
suppressWarnings(rlm.i <- rlm(selProfile.i[highInd.i], y.out.i[highInd.i]))
dd.i <- y.out.i[highInd.i] - rlm.i$fitted.values
outlier.ind.high.i <- highInd.i[which(abs(dd.i) > 3 * sd(dd.i))]
outlier.ind.i <- c(outlier.ind.low.i, outlier.ind.high.i)
if (length(outlier.ind.i) > 0) {
tt <- locpoly(selProfile.i[-outlier.ind.i], y.out.i[-outlier.ind.i], degree=degree, gridsize=gridsize, bandwidth=bandwidth, ...)
# test <- monoSpline(x=selProfile.i[-outlier.ind.i], y=y.out.i[-outlier.ind.i], newX=adjData.i, nKnots=50, ifPlot=FALSE)
} else {
tt <- locpoly(selProfile.i, y.out.i, degree=degree, gridsize=gridsize, bandwidth=bandwidth, ...)
}
norm.i <- approx(x=tt$x, y=tt$y, xout=adjData.i, rule=2)$y
## check the rank difference before and after fitting
rank.old.i <- rank(adjData.i, ties.method='min')
rank.new.i <- rank(norm.i, ties.method='min')
suppressWarnings(rank.fit.i <- rlm(new~old, data.frame(new=rank.new.i, old=rank.old.i)))
rank.diff.ind.i <- which(abs(rank.fit.i$residuals) > 1)
## Add na and infinite indexes
rank.diff.ind.i <- unique(c(rank.diff.ind.i, which(is.na(norm.i)), which(is.infinite(norm.i))))
if (length(rank.diff.ind.i) > 0) {
rank.na.i <- rank.old.i[rank.diff.ind.i]
replaceVal.i <- NULL
for (rank.na.ij in unique(rank.na.i)) {
down.ij <- sort(norm.i[rank.old.i < rank.na.ij & !(rank.old.i %in% rank.na.i)], decreasing=TRUE)[1]
up.ij <- sort(norm.i[rank.old.i > rank.na.ij & !(rank.old.i %in% rank.na.i)], decreasing=FALSE)[1]
norm.i[rank.diff.ind.i[rank.na.i == rank.na.ij]] <- mean(c(up.ij, down.ij), na.rm=TRUE)
}
}
if (length(profile.naInd.i) > 0) {
tmp.i <- norm.i
norm.i <- rep(NA, nrow(normData))
norm.i[-profile.naInd.i] <- tmp.i
}
# plot(selProfile.i, y.out.i, pch='.')
# lines(tt, col=2)
# if (max(norm.i) > 30) browser()
if (logMode) {
normData[!naInd.i,i] <- 2^norm.i
} else {
normData[!naInd.i,i] <- norm.i
}
}
return(normData)
}
estimateMethylationBG <- function(methyLumiM, separateColor=FALSE, nbin=1000) {
estimateBG <- function(dataMatrix, nbin=1000) {
if (is(dataMatrix, 'BigMatrix')) {
bg <- bigmemoryExtras::apply(dataMatrix, 2, function(x) {
hh.x <- hist(x, nbin, plot=FALSE)
Th <- hh.x$breaks[which.max(hh.x$counts) + 1] * 2
dd.x <- density(x[x < Th], na.rm=TRUE)
bg.x <- dd.x$x[which.max(dd.x$y)]
})
} else {
bg <- apply(dataMatrix, 2, function(x) {
hh.x <- hist(x, nbin, plot=FALSE)
Th <- hh.x$breaks[which.max(hh.x$counts) + 1] * 2
dd.x <- density(x[x < Th], na.rm=TRUE)
bg.x <- dd.x$x[which.max(dd.x$y)]
})
}
return(bg)
}
## check whether the control data is available
controlData <- NULL
if (is(methyLumiM, "MethyLumiM")) {
controlData <- controlData(methyLumiM)
}
if (is(methyLumiM, "MethyLumiSet")) {
controlData <- QCdata(methyLumiM)
}
if (is(controlData, "MethyLumiQC")) {
# For control data, methylated data corresponds to green channel, "Signal_Grn"
# For control data, unmethylated data corresponds to red channel, "Signal_Red"
grnData <- assayDataElement(controlData, "methylated")
redData <- assayDataElement(controlData, "unmethylated")
allControlType <- sapply(strsplit(featureNames(controlData), "\\."), function(x) x[1])
allControlType <- toupper(allControlType)
neg.ind <- which(allControlType == "NEGATIVE")
if (length(neg.ind) > 0) {
bg.grn <- apply(grnData[neg.ind, ,drop=F], 2, median)
bg.red <- apply(redData[neg.ind, ,drop=F], 2, median)
if (separateColor) {
bg <- cbind(red=bg.red, green=bg.grn)
} else {
bg <- pmin(bg.red, bg.grn)
}
return(bg)
}
}
## In the case the negative control data is not available, the background will be estimated based on the mode positions of unmethylated or methylated distribution (the smaller one)
if (is(methyLumiM, "eSet")) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
if (separateColor) {
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
bg.methy <- estimateBG(methy, nbin=nbin)
bg.unmethy <- estimateBG(unmethy, nbin=nbin)
bg <- pmin(bg.methy, bg.unmethy)
} else {
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
bg.methy.red <- estimateBG(methy[allRedInd,], nbin=nbin)
bg.unmethy.red <- estimateBG(unmethy[allRedInd,], nbin=nbin)
bg.red <- pmin(bg.methy.red, bg.unmethy.red)
bg.methy.grn <- estimateBG(methy[allGrnInd,], nbin=nbin)
bg.unmethy.grn <- estimateBG(unmethy[allGrnInd,], nbin=nbin)
bg.grn <- pmin(bg.methy.grn, bg.unmethy.grn)
bg <- cbind(red=bg.red, green=bg.grn)
}
} else {
bg.methy <- estimateBG(methy, nbin=nbin)
bg.unmethy <- estimateBG(unmethy, nbin=nbin)
bg <- pmin(bg.methy, bg.unmethy)
}
} else if (is(methyLumiM, 'matrix')) {
bg <- estimateBG(methyLumiM, nbin=nbin)
} else {
stop("The input data class is not supported!\n")
}
return(bg)
}
normalizeMethylation.ssn <- function(methyLumiM, separateColor=FALSE) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
if (storageMode(assayData(methyLumiM)) == "environment") storageMode(assayData(methyLumiM)) <- "lockedEnvironment"
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
# estimate the background based on methy
bg <- estimateMethylationBG(methyLumiM, separateColor=separateColor)
if (is.null(colnames(bg))) separateColor <- FALSE
if (separateColor) {
bg.red <- bg[,'red']
bg.grn <- bg[,'green']
meanBg.red <- mean(bg.red)
meanBg.grn <- mean(bg.grn)
annotation <- pData(featureData(methyLumiM))
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
intensity.red <- unmethy[allRedInd, ] + methy[allRedInd, ]
totalIntensity.red <- colSums(intensity.red)
meanTotalIntensity.red <- mean(totalIntensity.red)
intensity.grn <- unmethy[allGrnInd, ] + methy[allGrnInd, ]
totalIntensity.grn <- colSums(intensity.grn)
meanTotalIntensity.grn <- mean(totalIntensity.grn)
methy[allRedInd, ] <- (methy[allRedInd, ] - rep(1, length(allRedInd)) %*% t(bg.red)) * (rep(1, length(allRedInd)) %*% t(meanTotalIntensity.red/totalIntensity.red)) + meanBg.red
methy[allGrnInd, ] <- (methy[allGrnInd, ] - rep(1, length(allGrnInd)) %*% t(bg.grn)) * (rep(1, length(allGrnInd)) %*% t(meanTotalIntensity.grn/totalIntensity.grn)) + meanBg.grn
unmethy[allRedInd, ] <- (unmethy[allRedInd, ] - rep(1, length(allRedInd)) %*% t(bg.red)) * (rep(1, length(allRedInd)) %*% t(meanTotalIntensity.red/totalIntensity.red)) + meanBg.red
unmethy[allGrnInd, ] <- (unmethy[allGrnInd, ] - rep(1, length(allGrnInd)) %*% t(bg.grn)) * (rep(1, length(allGrnInd)) %*% t(meanTotalIntensity.grn/totalIntensity.grn)) + meanBg.grn
} else {
meanBg <- mean(bg)
if (is(methy, 'BigMatrix')) {
totalIntensity <- NULL
for (i in 1:ncol(methy)) {
intensity.i <- unmethy[,i] + methy[,i]
totalIntensity <- c(totalIntensity, sum(intensity.i))
}
meanTotalIntensity <- mean(totalIntensity)
for (i in 1:ncol(methy)) {
unmethy[,i] <- (unmethy[,i] - bg[i]) * (meanTotalIntensity[i]/totalIntensity) + meanBg
methy[,i] <- (methy[,i] - bg[i]) * (meanTotalIntensity[i]/totalIntensity) + meanBg
}
} else {
intensity <- unmethy + methy
totalIntensity <- colSums(intensity)
meanTotalIntensity <- mean(totalIntensity)
unmethy <- (unmethy - rep(1, nrow(intensity)) %*% t(bg)) * (rep(1, nrow(intensity)) %*% t(meanTotalIntensity/totalIntensity)) + meanBg
methy <- (methy - rep(1, nrow(intensity)) %*% t(bg)) * (rep(1, nrow(intensity)) %*% t(meanTotalIntensity/totalIntensity)) + meanBg
}
}
assayDataElement(methyLumiM, 'unmethylated') <- unmethy
assayDataElement(methyLumiM, 'methylated') <- methy
if (is(methyLumiM, "MethyLumiM") || is(methyLumiM, "MethyGenoSet")) methyLumiM <- estimateM(methyLumiM)
return(methyLumiM)
}
## this internal function was based on limma::normalizeQuantiles
.estimate.quantile.reference <- function(dataMatrix, ..., batchSize=50, repTime=NULL, ties=TRUE) {
otherDataMatrix <- list(...)
## subsampling the data to estimate the reference when the size is bigger than the batchSize
reference <- NULL
if (ncol(dataMatrix) > batchSize) {
repTime <- ncol(dataMatrix) / batchSize * 2
if (ncol(dataMatrix) < batchSize) {
batchSize <- ncol(dataMatrix)
repTime <- 1
}
for (i in 1:repTime) {
ind.i <- sample(1:ncol(dataMatrix), batchSize)
dataMatrix.i <- dataMatrix[, ind.i]
if (length(otherDataMatrix) > 0) {
for (j in 1:length(otherDataMatrix)) {
dataMatrix.i <- rbind(dataMatrix.i, otherDataMatrix[[j]][, ind.i])
}
}
reference.i <- .estimate.quantile.reference(dataMatrix.i)
reference <- cbind(reference, reference.i)
}
reference <- rowMeans(reference)
return(reference)
} else {
dataMatrix <- dataMatrix[,]
if (length(otherDataMatrix) > 0) {
for (j in 1:length(otherDataMatrix)) {
dataMatrix <- rbind(dataMatrix, otherDataMatrix[[j]][,])
}
}
dm <- dim(dataMatrix)
if (is.null(dm))
return(dataMatrix)
if (dm[2] == 1)
return(dataMatrix)
O <- S <- array(, dm)
nobs <- rep(dm[1], dm[2])
i <- (0:(dm[1] - 1))/(dm[1] - 1)
for (j in 1:dm[2]) {
Si <- sort(dataMatrix[, j], method = "quick", index.return = TRUE)
nobsj <- length(Si$x)
if (nobsj < dm[1]) {
nobs[j] <- nobsj
isna <- is.na(dataMatrix[, j])
S[, j] <- approx((0:(nobsj - 1))/(nobsj - 1), Si$x,
i, ties = "ordered")$y
O[!isna, j] <- ((1:dm[1])[!isna])[Si$ix]
}
else {
S[, j] <- Si$x
O[, j] <- Si$ix
}
}
reference <- rowMeans(S)
return(reference)
}
}
## this internal function was based on limma::normalizeQuantiles
.quantileNormalization.reference <- function(dataMatrix, reference=NULL, ties = TRUE) {
dm <- dim(dataMatrix)
if (is.null(dm)) {
dataMatrix <- matrix(dataMatrix, ncol=1)
dm <- dim(dataMatrix)
}
if (dm[2] == 1 && is.null(reference))
return(dataMatrix)
if (!is.null(reference)) {
if (length(reference) != dm[1]) stop('The length of reference does not match the dimension of dataMatrix!')
}
O <- S <- array(, dm)
nobs <- rep(dm[1], dm[2])
i <- (0:(dm[1] - 1))/(dm[1] - 1)
for (j in 1:dm[2]) {
Si <- sort(dataMatrix[, j], method = "quick", index.return = TRUE)
nobsj <- length(Si$x)
if (nobsj < dm[1]) {
nobs[j] <- nobsj
isna <- is.na(dataMatrix[, j])
S[, j] <- approx((0:(nobsj - 1))/(nobsj - 1), Si$x,
i, ties = "ordered")$y
O[!isna, j] <- ((1:dm[1])[!isna])[Si$ix]
}
else {
S[, j] <- Si$x
O[, j] <- Si$ix
}
}
if (is.null(reference)) {
reference <- rowMeans(S)
} else {
reference <- sort(reference, method='quick')
}
nobs <- rep(dm[1], dm[2])
index <- (0:(dm[1] - 1))/(dm[1] - 1)
for (j in 1:dm[2]) {
if (ties)
rk <- rank(dataMatrix[, j])
if (nobs[j] < dm[1]) {
isna <- is.na(dataMatrix[, j])
if (ties)
dataMatrix[!isna, j] <- approx(index, reference, (rk[!isna] - 1)/(nobs[j] -
1), ties = "ordered")$y
else dataMatrix[O[!isna, j], j] <- approx(index, reference, (0:(nobs[j] -
1))/(nobs[j] - 1), ties = "ordered")$y
}
else {
if (ties)
dataMatrix[, j] <- approx(index, reference, (rk - 1)/(dm[1] - 1), ties = "ordered")$y
else dataMatrix[O[, j], j] <- reference
}
}
return(dataMatrix)
}
##
normalizeMethylation.quantile <- function(methyLumiM, separateColor=FALSE, reference=NULL, ...) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
if (storageMode(assayData(methyLumiM)) == "environment") storageMode(assayData(methyLumiM)) <- "lockedEnvironment"
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
if (separateColor) {
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
separateColor <- FALSE
} else {
allRedInd <- which(annotation$COLOR_CHANNEL == 'Red')
allGrnInd <- which(annotation$COLOR_CHANNEL == 'Grn')
}
}
## It will perform normalization sample by sample when the data is a BigMatrix
if (is(assayDataElement(methyLumiM, 'exprs'), "BigMatrix")) {
## estimate the reference by randomly pick columns
if (is.null(reference)) {
reference <- .estimate.quantile.reference(dataMatrix=assayDataElement(methyLumiM, 'methylated'), dataMatrix2=assayDataElement(methyLumiM, 'unmethylated'), batchSize=50)
}
for (i in 1:ncol(methy)) {
x.matrix.i <- rbind(methy[,i, drop=FALSE], unmethy[,i, drop=FALSE])
# Normalize the intensity using quantile normalization
x.matrix.i <- .quantileNormalization.reference(x.matrix.i, reference=reference, ...)
methy[,i] <- x.matrix.i[1:nrow(methy),1]
unmethy[,i] <- x.matrix.i[(nrow(methy)+1):nrow(x.matrix.i),1]
# estimate the M-value
if (is(methyLumiM, "MethyLumiM")) assayDataElement(methyLumiM, 'exprs')[,i] <- lumi::estimateM(methyLumiM[,i], returnType='matrix')
}
} else {
if (separateColor) {
methy.n <- methy; unmethy.n <- unmethy
methy.red <- methy[allRedInd, ]
methy.grn <- methy[allGrnInd, ]
unmethy.red <- unmethy[allRedInd, ]
unmethy.grn <- unmethy[allGrnInd, ]
x.matrix.red <- rbind(methy.red, unmethy.red)
x.matrix.grn <- rbind(methy.grn, unmethy.grn)
# Normalize the intensity using quantile normalization
if (!is.null(reference)) {
x.matrix.red <- .quantileNormalization.reference(x.matrix.red, reference=reference, ...)
x.matrix.grn <- .quantileNormalization.reference(x.matrix.red, reference=reference, ...)
} else {
x.matrix.red <- limma::normalizeQuantiles(x.matrix.red, ...)
x.matrix.grn <- limma::normalizeQuantiles(x.matrix.grn, ...)
# x.matrix.red <- normalize.quantiles.robust(x.matrix.red + 0.0, ...)
# x.matrix.grn <- normalize.quantiles.robust(x.matrix.grn + 0.0, ...)
}
len.red <- length(allRedInd)
len.grn <- length(allGrnInd)
methy.n[allRedInd,] <- x.matrix.red[1:len.red,]
unmethy.n[allRedInd,] <- x.matrix.red[(len.red+1):nrow(x.matrix.red),]
methy.n[allGrnInd,] <- x.matrix.grn[1:len.grn,]
unmethy.n[allGrnInd,] <- x.matrix.grn[(len.grn+1):nrow(x.matrix.grn),]
} else {
x.matrix <- rbind(methy, unmethy)
# Normalize the intensity using quantile normalization
if (!is.null(reference)) {
x.matrix <- .quantileNormalization.reference(x.matrix, reference=reference, ...)
} else {
x.matrix <- limma::normalizeQuantiles(x.matrix, ...)
#x.matrix <- normalize.quantiles.robust(x.matrix + 0.0, ...)
}
methy.n <- x.matrix[1:nrow(methy),]
unmethy.n <- x.matrix[(nrow(unmethy)+1):nrow(x.matrix),]
}
colnames(methy.n) <- colnames(unmethy.n) <- colnames(methy)
rownames(methy.n) <- rownames(unmethy.n) <- rownames(methy)
assayDataElement(methyLumiM, 'unmethylated') <- unmethy.n
assayDataElement(methyLumiM, 'methylated') <- methy.n
if (is(methyLumiM, "MethyLumiM") || is(methyLumiM, "MethyGenoSet")) methyLumiM <- estimateM(methyLumiM)
}
return(methyLumiM)
}
# estimate the intensity measured by Illumina Infinium methylation probes
# which basically is the sum of methylated and unmethylated probe intensity
estimateIntensity <- function(methyLumiM, returnType=c("ExpressionSet", "matrix")) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
returnType <- match.arg(returnType)
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
if (returnType == "matrix") {
intensity <- unmethy[,] + methy[,]
return(intensity)
} else {
# methyLumiM <- as(methyLumiM, "ExpressionSet")
# exprs(methyLumiM) <- intensity
if ('dataType' %in% slotNames(methyLumiM)) {
dataType(methyLumiM) <- 'Intensity'
}
# if (is(assayDataElement(methyLumiM, 'exprs'), 'BigMatrix')) {
# for (i in 1:ncol(methy)) {
# assayDataElement(methyLumiM, 'exprs')[,i] <- unmethy[,i] + methy[,i]
# }
# } else {
assayDataElement(methyLumiM, 'exprs') <- unmethy[,] + methy[,]
# }
return(methyLumiM)
}
}
# convert beta-value to m-value
beta2m <- function(beta) {
m <- log2(beta/(1-beta))
return(m)
}
# convert m-value to beta-value
m2beta <- function(m) {
beta <- 2^m/(2^m + 1)
return(beta)
}
# estimate the M-value based on methylated and unmethylated probe intensities
estimateM <- function(methyLumiM, returnType=c("ExpressionSet", "matrix"), offset=100) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
returnType <- match.arg(returnType)
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
## in case of BigMatrix
if (is(methy, 'BigMatrix')) {
mm <- min(c(bigmemoryExtras::apply(unmethy, 2, min, na.rm=TRUE), bigmemoryExtras::apply(methy, 2, min, na.rm=TRUE)), na.rm=TRUE)
} else {
mm <- min(c(apply(unmethy, 2, min, na.rm=TRUE), apply(methy, 2, min, na.rm=TRUE)), na.rm=TRUE)
}
if (mm < 0.01) {
if (is(methy, 'BigMatrix')) {
for (i in 1:ncol(methy)) {
if (any(unmethy[,i] < 0.01)) unmethy[which(unmethy[,i] < 0.01), i] <- 0.01
if (any(methy[,i] < 0.01)) methy[which(methy[,i] < 0.01), i] <- 0.01
}
} else {
unmethy[unmethy < 0.01] <- 0.01
methy[methy < 0.01] <- 0.01
}
}
if (returnType == 'matrix') {
M <- log2((methy[,] + offset) / (unmethy[,] + offset))
return(M)
} else {
if ('dataType' %in% slotNames(methyLumiM)) {
dataType(methyLumiM) <- 'M'
}
if (is(assayDataElement(methyLumiM, 'exprs'), 'BigMatrix')) {
for (i in 1:ncol(methy)) {
assayDataElement(methyLumiM, 'exprs')[,i] <- log2((methy[,i] + offset) / (unmethy[,i] + offset))
}
} else {
assayDataElement(methyLumiM, 'exprs') <- log2((methy[,] + offset) / (unmethy[,] + offset))
}
return(methyLumiM)
}
}
# estimate the Beta-value based on methylated and unmethylated probe intensities
estimateBeta <- function(methyLumiM, returnType=c("ExpressionSet", "matrix"), offset=100) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
returnType <- match.arg(returnType)
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
## in case of BigMatrix
if (is(methy, 'BigMatrix')) {
mm <- min(c(bigmemoryExtras::apply(unmethy, 2, min, na.rm=TRUE), bigmemoryExtras::apply(methy, 2, min, na.rm=TRUE)), na.rm=TRUE)
} else {
mm <- min(c(apply(unmethy, 2, min, na.rm=TRUE), apply(methy, 2, min, na.rm=TRUE)), na.rm=TRUE)
}
if (mm < 0.01) {
if (is(methy, 'BigMatrix')) {
for (i in 1:ncol(methy)) {
if (any(unmethy[,i] < 0.01)) unmethy[which(unmethy[,i] < 0.01), i] <- 0.01
if (any(methy[,i] < 0.01)) methy[which(methy[,i] < 0.01), i] <- 0.01
}
} else {
unmethy[unmethy < 0.01] <- 0.01
methy[methy < 0.01] <- 0.01
}
}
if (returnType == "matrix") {
beta <- methy[,] / (unmethy[,] + methy[,] + offset)
return(beta)
} else {
# methyLumiBeta <- as(methyLumiM, "ExpressionSet")
if ('dataType' %in% slotNames(methyLumiM)) {
dataType(methyLumiM) <- 'Beta'
}
if (is(assayDataElement(methyLumiM, 'exprs'), 'BigMatrix')) {
for (i in 1:ncol(methy)) {
assayDataElement(methyLumiM, 'exprs')[,i] <- methy[,i] / (unmethy[,i] + methy[,i] + offset)
}
} else {
assayDataElement(methyLumiM, 'exprs') <- methy[,] / (unmethy[,] + methy[,] + offset)
}
return(methyLumiM)
}
}
# boxplot unique for MethyLumiM-class
# setMethod("boxplot",signature(x="MethyLumiM"),
# function(x, main, prob=c(seq(10,90, by=10), 95), col=gray(rev(seq(prob)/length(prob))), logMode=TRUE, ...) {
setMethod("boxplot",signature(x="MethyLumiM"), function(x, main, logMode=TRUE, ...) {
tmp <- description(x)
if (missing(main) && (is(tmp, "MIAME")))
main <- tmp@title
dataMatrix <- exprs(x)
labels <- colnames(dataMatrix)
if (is.null(labels)) labels <- as.character(1:ncol(dataMatrix))
## set the margin of the plot
mar <- c(max(nchar(labels))/2 + 4.5, 5, 5, 3)
old.mar <- par(mar=mar)
on.exit(par(old.mar))
if (.hasSlot(x, 'dataType')) {
datatype <- dataType(x)
if (datatype == '' || length(datatype) == 0) datatype <- 'M'
ylab <- switch(datatype,
M='M-value',
Beta='Beta-value',
datatype)
if (datatype == 'Intensity' && logMode) {
dataMatrix[dataMatrix <= 0] <- NA
dataMatrix <- log2(dataMatrix)
ylab <- 'Log2-Intensity of CpG-sites'
}
} else {
ylab <- 'M-value'
datatype <- 'M'
}
if (datatype == 'Intensity') {
boxplot(as(x, 'ExpressionSet'), main=main, xlab='', ylab=ylab, xaxt='n', ...)
axis(1, at=1:ncol(dataMatrix), labels=labels, tick=TRUE, las=2)
} else {
# tmp <- lapply(1:ncol(dataMatrix), function(i) dataMatrix[,i])
tmp <- data.frame(M=as.vector(dataMatrix), sample=factor(col(dataMatrix), labels=colnames(dataMatrix)))
print(bwplot(M ~ sample, data=tmp, ylab=ylab, main=main, scales=list(rot=90),
panel = function(..., box.ratio) {
panel.violin(..., col = "grey", varwidth = FALSE, box.ratio = box.ratio)
}))
# hdr.boxplot(tmp, main=main, xlab='', ylab=ylab, prob=prob, col=col, ...)
}
})
## boxplotColorBias
# boxplotColorBias(methyLumiM)
boxplotColorBias <- function(methyLumiM, logMode=TRUE, channel=c('both', 'unmethy', 'methy', 'sum'), grid=TRUE, main=NULL, mar=NULL, verbose=F, subset=NULL, ...) {
channel <- match.arg(channel)
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
if (!is.null(subset)) {
if (!is.numeric(subset)) stop('subset should be numeric.')
if (length(subset) == 1) {
index <- sample(1:nrow(methyLumiM), min(subset, nrow(methyLumiM)))
} else {
index <- subset
index <- index[index > 0 & index <= nrow(methyLumiM)]
}
methyLumiM <- methyLumiM[index,]
}
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
return(invisible(FALSE))
}
redInd <- annotation$COLOR_CHANNEL == 'Red'
grnInd <- annotation$COLOR_CHANNEL == 'Grn'
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
if (logMode) {
unmethy[unmethy < 1] <- 1
methy[methy < 1] <- 1
unmethy <- log2(unmethy)
methy <- log2(methy)
}
nSample <- ncol(unmethy)
allRed <- allGrn <- NULL
tmp <- lapply(1:nSample, function(i) {
red.a.i <- unmethy[redInd, i]
red.b.i <- methy[redInd, i]
grn.a.i <- unmethy[grnInd, i]
grn.b.i <- methy[grnInd, i]
if (channel == 'both') {
red <- c(red.a.i, red.b.i)
grn <- c(grn.a.i, grn.b.i)
} else if (channel == 'unmethy') {
red <- red.a.i
grn <- grn.a.i
} else if (channel == 'methy') {
red <- red.b.i
grn <- grn.b.i
} else {
red <- red.a.i + red.b.i
grn <- grn.a.i + grn.b.i
}
allRed <<- cbind(allRed, red)
allGrn <<- cbind(allGrn, grn)
return(NULL)
})
labels <- colnames(unmethy)
if (is.null(main)) {
main <- "Boxplots of Red and Green color channels"
}
if (is.null(mar)) {
mar <- c(max(nchar(labels))/2 + 4.5, 5, 5, 3)
mar[mar > 15] <- 15
if (verbose) cat("mar:", mar, "\n")
}
old.par <- par(mar=mar, xaxt='n')
info <- switch(channel,
"both"="intensity of both methylated and unmethylated probes",
"methy"="intensity of methylated probes only",
"unmethy"="intensity of unmethylated probes only",
"sum"="CpG-site Intensity")
if (logMode) {
ylab <- paste("Log2", info)
} else {
substr(info, 1, 1) <- toupper(substr(info, 1, 1))
ylab <- info
}
boxplot(allRed ~ col(allRed), col = "red", boxwex = 0.25, at = 1:nSample - 0.175, ylab=ylab, xlab="", main=main, ...)
boxplot(allGrn ~ col(allGrn), col = "green", boxwex = 0.25, at = 1:nSample + 0.175, axis=F, add=TRUE, ylab="", xlab="")
par(xaxt='s')
axis(1, at=1:ncol(allRed), labels=labels, tick=TRUE, las=2)
par(old.par)
if (grid) abline(v=1:(nSample-1) + 0.5, col = "lightgray", lty = "dotted")
return(invisible(TRUE))
}
# plotColorBias1D(methyLumiM)
## plot either density or scatter plot of two color channels
plotColorBias1D <- function(methyLumiM, channel=c('both', 'unmethy', 'methy', 'sum'), colorMode=TRUE, removeGenderProbes=FALSE, logMode=TRUE, subset=NULL, ...) {
channel <- match.arg(channel)
otherPar <- list(...)
densityPar <- otherPar[names(otherPar) %in% names(formals(density.default))]
otherPar[names(otherPar) %in% names(formals(density.default))] <- NULL
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
if (!is.null(subset)) {
if (!is.numeric(subset)) stop('subset should be numeric.')
if (length(subset) == 1) {
index <- sample(1:nrow(methyLumiM), min(subset, nrow(methyLumiM)))
} else {
index <- subset
index <- index[index > 0 & index <= nrow(methyLumiM)]
}
methyLumiM <- methyLumiM[index,]
}
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
return(methyLumiM)
}
if (removeGenderProbes && !is.null(annotation$CHR)) {
redInd <- annotation$COLOR_CHANNEL == 'Red' & !(annotation$CHR %in% c('X', 'Y'))
grnInd <- annotation$COLOR_CHANNEL == 'Grn' & !(annotation$CHR %in% c('X', 'Y'))
} else {
redInd <- annotation$COLOR_CHANNEL == 'Red'
grnInd <- annotation$COLOR_CHANNEL == 'Grn'
}
nSample <- ncol(unmethy)
density.list <- lapply(1:nSample, function(i) {
red.a.i <- unmethy[redInd, i]
red.b.i <- methy[redInd, i]
grn.a.i <- unmethy[grnInd, i]
grn.b.i <- methy[grnInd, i]
if (channel == 'both') {
red <- c(red.a.i, red.b.i)
grn <- c(grn.a.i, grn.b.i)
} else if (channel == 'unmethy') {
red <- red.a.i
grn <- grn.a.i
} else if (channel == 'methy') {
red <- red.b.i
grn <- grn.b.i
} else {
red <- red.a.i + red.b.i
grn <- grn.a.i + grn.b.i
}
if (colorMode) {
if (logMode) {
red[red < 1] <- 1
grn[grn < 1] <- 1
if (length(densityPar) > 0) {
dd.red <- do.call('density', c(list(log2(red)), densityPar))
dd.grn <- do.call('density', c(list(log2(grn)), densityPar))
} else {
dd.red <- density(log2(red))
dd.grn <- density(log2(grn))
}
} else {
if (length(densityPar) > 0) {
dd.red <- do.call('density', c(list(red), densityPar))
dd.grn <- do.call('density', c(list(grn), densityPar))
} else {
dd.red <- density(red)
dd.grn <- density(grn)
}
}
} else {
pool <- c(red, grn)
if (logMode) {
pool[pool < 1] <- 1
if (length(densityPar) > 0) {
dd.pool <- do.call('density', c(list(log2(pool)), densityPar))
} else {
dd.pool <- density(log2(pool))
}
} else {
if (length(densityPar) > 0) {
dd.pool <- do.call('density', c(list(pool), densityPar))
} else {
dd.pool <- density(pool)
}
}
dd.red <- dd.grn <- dd.pool
}
return(list(red=dd.red, green=dd.grn))
})
mm.density <- max(sapply(density.list, function(x) max(c(x$red$y, x$green$y))))
xrange <- range(sapply(density.list, function(x) range(c(x$red$x, x$green$x))))
info <- switch(channel,
"both"="intensity of both methylated and unmethylated probes",
"methy"="intensity of methylated probes only",
"unmethy"="intensity of unmethylated probes only",
"sum"="CpG-site Intensity")
if (logMode) {
xlab <- paste("Log2", info)
} else {
substr(info, 1, 1) <- toupper(substr(info, 1, 1))
xlab <- info
}
if (is.null(otherPar$xlab)) otherPar$xlab <- xlab
if (is.null(otherPar$ylab)) otherPar$ylab <- "Density"
if (is.null(otherPar$xlim)) otherPar$xlim <- xrange
if (is.null(otherPar$ylim)) otherPar$ylim <- c(0,mm.density)
if (is.null(otherPar$main)) otherPar$main <- ifelse(colorMode, "Compare density distribution of two color channels", "Compare density distribution")
if (is.null(otherPar$type)) otherPar$type <- 'l'
if (colorMode) {
if (is.null(otherPar$col)) otherPar$col <- 2
} else {
if (is.null(otherPar$col)) {
otherPar$col <- 1:nSample
} else if (length(otherPar$col) < nSample)
otherPar$col <- rep(otherPar$col[1], nSample)
}
for (i in 1:nSample) {
dd.red.i <- density.list[[i]]$red
dd.grn.i <- density.list[[i]]$green
if (i == 1) {
do.call('plot', c(list(dd.red.i), otherPar))
if (colorMode) lines(dd.grn.i, col=3)
} else {
if (colorMode) {
lines(dd.red.i, col=otherPar$col, lty=i)
lines(dd.grn.i, col=3, lty=i)
} else {
lines(dd.red.i, col=otherPar$col[i], lty=i)
}
}
}
return(invisible(TRUE))
}
## plotColorBias2D(methyLumiM, selSample=1)
plotColorBias2D <- function(methyLumiM, selSample=1, combineMode=F, layoutRatioWidth=c(0.75,0.25), layoutRatioHeight=c(0.25, 0.75),
margins = c(5, 5, 2, 2), cex=1.25, logMode=TRUE, subset=NULL, ...) {
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
otherPar <- list(...)
if (!is.null(subset)) {
if (!is.numeric(subset)) stop('subset should be numeric.')
if (length(subset) == 1) {
index <- sample(1:nrow(methyLumiM), min(subset, nrow(methyLumiM)))
} else {
index <- subset
index <- index[index > 0 & index <= nrow(methyLumiM)]
}
methyLumiM <- methyLumiM[index,]
}
ff <- pData(featureData(methyLumiM))
color.channel <- ff[,"COLOR_CHANNEL"]
if (is.null(color.channel) && !combineMode) stop("No color channel information included in the data!\n Please add it using addAnnotationInfo function.\n")
unmethy <- assayDataElement(methyLumiM, 'unmethylated')[, selSample[1]]
methy <- assayDataElement(methyLumiM, 'methylated')[, selSample[1]]
if (logMode) {
unmethy[unmethy < 1] <- 1
methy[methy < 1] <- 1
unmethy <- log2(unmethy)
methy <- log2(methy)
}
if (!combineMode) {
grn.a <- unmethy[color.channel == "Grn"]
red.a <- unmethy[color.channel == "Red"]
grn.b <- methy[color.channel == "Grn"]
red.b <- methy[color.channel == "Red"]
}
oldpar <- par(no.readonly = TRUE)
layout(matrix(c(2,1,0,3), nrow=2), widths = layoutRatioWidth, heights = layoutRatioHeight, respect = FALSE)
# layout.show(3)
## plot the scatter plot
par(mar = c(margins[1], margins[2], 0, 0))
plottype <- ifelse(combineMode, "p", "n")
if (is.null(otherPar$pch)) otherPar$pch <- '.'
if (is.null(otherPar$type)) otherPar$type <- plottype
if (is.null(otherPar$xlim)) otherPar$xlim <- range(unmethy)
if (is.null(otherPar$ylim)) otherPar$ylim <- range(methy)
if (is.null(otherPar$xlab)) otherPar$xlab <- 'Unmethylated Probe Intensity'
if (is.null(otherPar$ylab)) otherPar$ylab <- 'Methylated Probe Intensity'
if (is.null(otherPar$main)) otherPar$main <- ''
if (logMode) {
if (is.null(otherPar$xlab)) otherPar$xlab <- 'Unmethylated Probe Intensity (log2)'
if (is.null(otherPar$ylab)) otherPar$ylab <- 'Methylated Probe Intensity (log2)'
}
do.call('plot', c(list(unmethy), list(methy), otherPar))
if (combineMode) {
dd.a <- density(unmethy)
dd.b <- density(methy)
par(mar = c(1, margins[2], margins[3], 0))
plot(dd.a, xlab='', ylab='Density', xlim=otherPar$xlim, xaxt='n', col='black', type='l', main='')
## plot the density plot of methylated probes
par(mar = c(margins[1], 1, 0, margins[4]))
plot(dd.b$y, dd.b$x, xlab='Density', ylab='', ylim=otherPar$ylim, yaxt='n', col='black', type='l', main='')
} else {
points(red.a, red.b, pch='.', cex=cex, col='red')
points(grn.a, grn.b, pch='.', cex=cex, col='green')
dd.grn.a <- density(grn.a)
dd.grn.b <- density(grn.b)
dd.red.a <- density(red.a)
dd.red.b <- density(red.b)
## plot the density plot of unmethylated probes
par(mar = c(1, margins[2], margins[3], 0))
plot(dd.red.a, xlab='', ylab='Density', xlim=otherPar$xlim, ylim=range(c(dd.grn.a$y, dd.red.a$y)), xaxt='n', col='red', type='l', main='')
lines(dd.grn.a, col='green')
## plot the density plot of methylated probes
par(mar = c(margins[1], 1, 0, margins[4]))
plot(dd.red.b$y, dd.red.b$x, xlab='Density', ylab='', xlim=range(c(dd.grn.b$y, dd.red.b$y)), ylim=otherPar$ylim, yaxt='n', col='red', type='l', main='')
lines(dd.grn.b$y, dd.grn.b$x, col='green')
}
par(oldpar)
return(invisible(TRUE))
}
# scatterPlotWithDensity <- function(x, y, channel=NULL, col=NULL, layoutRatioWidth=c(0.75,0.25), layoutRatioHeight=c(0.25, 0.75),
# margins = c(5, 5, 2, 2), cex=1.25, ...) {
#
# if (!is.null(channel)) {
# grn.a <- unmethy[color.channel == "Grn"]
# red.a <- unmethy[color.channel == "Red"]
# grn.b <- methy[color.channel == "Grn"]
# red.b <- methy[color.channel == "Red"]
# } else {
# channel <- rep(1, length(x))
# }
#
# oldpar <- par(no.readonly = TRUE)
# layout(matrix(c(2,1,0,3), nrow=2), widths = layoutRatioWidth, heights = layoutRatioHeight, respect = FALSE)
# # layout.show(3)
# ## plot the scatter plot
# par(mar = c(margins[1], margins[2], 0, 0))
#
# plot(x, y, type='n', ...)
#
# uniChannels <- unique(channel)
# if (is.null(col)) col <- 1:length(uniChannels)
# density.all <- NULL
# for (i in seq(uniChannels)) {
# x.i <- x[channel == uniChannels[i]]
# y.i <- y[channel == uniChannels[i]]
# dd.x.i <- density(x.i)
# dd.y.i <- density(y.i)
# density.all <- c(density.all, list(x=dd.x.i, y=dd.y.i))
# }
#
# ## plot densitis
# for (i in seq(uniChannels)) {
#
# x.i <- x[channel == uniChannels[i]]
# y.i <- y[channel == uniChannels[i]]
# points(x.i, y.i, pch='.', cex=cex, col=col[i])
#
# ## plot the density plot of unmethylated probes
# par(mar = c(1, margins[2], margins[3], 0))
# plot(dd.x.i, xlab='', ylab='Density', xlim=otherPar$xlim, ylim=range(c(dd.grn.a$y, dd.red.a$y)), xaxt='n', col='red', type='l', main='')
# lines(dd.grn.a, col='green')
# ## plot the density plot of methylated probes
# par(mar = c(margins[1], 1, 0, margins[4]))
# plot(dd.red.b$y, dd.red.b$x, xlab='Density', ylab='', xlim=range(c(dd.grn.b$y, dd.red.b$y)), ylim=otherPar$ylim, yaxt='n', col='red', type='l', main='')
# lines(dd.grn.b$y, dd.grn.b$x, col='green')
#
# }
#
# par(oldpar)
# }
colorBiasSummary <- function(methyLumiM, logMode=TRUE, channel=c('both', 'unmethy', 'methy', 'sum')) {
channel <- match.arg(channel)
if (!all(c("unmethylated", "methylated") %in% assayDataElementNames(methyLumiM))) {
stop("The input should include 'methylated' and 'unmethylated' elements in the assayData slot!\n")
}
unmethy <- assayDataElement(methyLumiM, 'unmethylated')
methy <- assayDataElement(methyLumiM, 'methylated')
annotation <- pData(featureData(methyLumiM))
if (is.null(annotation$COLOR_CHANNEL)) {
cat("No color balance adjustment because lack of COLOR_CHANNEL information!\n Please add it using addAnnotationInfo function.\n")
return(methyLumiM)
}
redInd <- annotation$COLOR_CHANNEL == 'Red'
grnInd <- annotation$COLOR_CHANNEL == 'Grn'
nSample <- ncol(unmethy)
summary.list <- lapply(1:nSample, function(i) {
red.a.i <- unmethy[redInd, i]
red.b.i <- methy[redInd, i]
grn.a.i <- unmethy[grnInd, i]
grn.b.i <- methy[grnInd, i]
if (channel == 'both') {
red <- c(red.a.i, red.b.i)
grn <- c(grn.a.i, grn.b.i)
} else if (channel == 'unmethy') {
red <- red.a.i
grn <- grn.a.i
} else if (channel == 'methy') {
red <- red.b.i
grn <- grn.b.i
} else {
red <- red.a.i + red.b.i
grn <- grn.a.i + grn.b.i
}
if (logMode) {
ss.red <- summary(log2(red))
ss.grn <- summary(log2(grn))
} else {
ss.red <- summary(red)
ss.grn <- summary(grn)
}
return(list(red=ss.red, green=ss.grn))
})
red.summary.matrix <- sapply(summary.list, function(x) x$red)
grn.summary.matrix <- sapply(summary.list, function(x) x$green)
colnames(red.summary.matrix) <- colnames(grn.summary.matrix) <- sampleNames(methyLumiM)
return(list(red=red.summary.matrix, green=grn.summary.matrix))
}
plotDensity <- function(dataMatrix, logMode=TRUE, addLegend=TRUE, legendPos="topright", subset=NULL, ...) {
otherPar <- list(...)
if (is(dataMatrix, 'MethyLumiM')) {
if (dataType(dataMatrix) == 'M') logMode <- FALSE
}
if (is(dataMatrix, 'ExpressionSet')) {
dataMatrix <- exprs(dataMatrix)
} else if (is.numeric(dataMatrix)) {
dataMatrix <- as.matrix(dataMatrix)
} else {
stop('Un-supported class of dataMatrix.')
}
if (!is.null(subset)) {
if (!is.numeric(subset)) stop('subset should be numeric.')
if (length(subset) == 1) {
index <- sample(1:nrow(dataMatrix), min(subset, nrow(dataMatrix)))
} else {
index <- subset
index <- index[index > 0 & index <= nrow(dataMatrix)]
}
dataMatrix <- dataMatrix[index,,drop=FALSE]
}
if (logMode) {
if (any(dataMatrix <= 0)) dataMatrix[dataMatrix <= 0.1] <- 0.1
tmp <- apply(log2(dataMatrix), 2, density)
} else {
tmp <- apply(dataMatrix, 2, density)
}
xx <- sapply(tmp, function(x) x$x)
yy <- sapply(tmp, function(x) x$y)
if (is.null(otherPar$type)) otherPar$type <- 'l'
if (is.null(otherPar$xlab)) otherPar$xlab <- "Intensity"
if (is.null(otherPar$ylab)) otherPar$ylab <- 'Density'
if (is.null(otherPar$lty)) otherPar$lty <- 1:ncol(dataMatrix)
if (is.null(otherPar$col)) otherPar$col <- 1:ncol(dataMatrix)
if (is.null(otherPar$lwd)) otherPar$lwd <- 1
if (is.null(otherPar$main)) otherPar$main <- "Density plot"
if (logMode) otherPar$xlab <- paste(otherPar$xlab, " (log2)", sep="")
do.call('matplot', c(list(xx), list(yy), otherPar))
## add legend
if (addLegend) {
labels <- colnames(dataMatrix)
if (is.null(labels)) labels <- as.character(1:ncol(dataMatrix))
if (length(legendPos) > 1) {
x.pos <- legendPos[1]
y.pos <- legendPos[2]
} else {
x.pos <- legendPos[1]
y.pos <- NULL
}
legend(x.pos, y.pos, legend=labels, box.lwd=0, col=otherPar$col, lty=otherPar$lty, lwd=otherPar$lwd)
}
return(invisible(TRUE))
}
produceMethylationGEOSubmissionFile <- function(methyLumiM, methyLumiM.raw=NULL, lib.mapping=NULL, idType='Probe', sampleInfo=NULL, fileName='GEOSubmissionFile.txt', supplementaryRdata=TRUE, ...) {
if (missing(methyLumiM)) stop('Please provide all required input parameters!\n')
if (is(methyLumiM, "MethyLumiM")) expr.norm <- estimateBeta(methyLumiM, returnType='matrix')
if (is.null(methyLumiM.raw)) {
detect <- detection(methyLumiM)
methyData <- methylated(methyLumiM)
unmethyData <- unmethylated(methyLumiM)
expr <- NULL
} else {
detect <- detection(methyLumiM.raw)
methyData <- methylated(methyLumiM.raw)
unmethyData <- unmethylated(methyLumiM.raw)
expr <- estimateBeta(methyLumiM.raw)
}
if (is.null(sampleInfo)) {
sampleInfo <- produceGEOSampleInfoTemplate(methyLumiM, lib.mapping=lib.mapping, fileName=NULL)
} else if (length(sampleInfo) == 1 && is.character(sampleInfo)) {
sampleInfo <- read.table(sampleInfo, sep='\t', colClasses='character', skip=1, header=TRUE, strip.white=TRUE, quote='')
} else if (is.null(nrow(sampleInfo))) {
stop('Please provide correct sample information (a data.frame, matrix, or sampleInfo file)!\n')
}
sampleInfoTitle <- colnames(sampleInfo)
if (any(sapply(sampleInfo[,-1, drop=F], nchar) == 0)) stop('No blank fields are allowed in the sampleInfo table!\nYou can check some example submissions, like GSM296418, at the GEO website.\n')
if (supplementaryRdata) sampleInfo[, "Sample_supplementary_file"] <- 'supplementaryData.Rdata'
nuID <- featureNames(methyLumiM)
probeId <- nuID
if (length(which(is.nuID(sample(nuID, 100)))) < 20) {
nuID <- NULL
} else {
if (!is.null(lib.mapping)) {
probeId <- nuID2IlluminaID(nuID, lib=lib.mapping, idType=idType, ...)
} else {
nuID <- NULL
}
}
sampleID <- sampleInfo[, "sampleID"]
sampleTitle <- sampleInfo[,'Sample_title']
for (i in seq(sampleID)) {
if (i == 1) {
cat('^SAMPLE =', sampleTitle[i], '\n', sep='', file=fileName, append=FALSE)
} else {
cat('^SAMPLE =', sampleTitle[i], '\n', sep='', file=fileName, append=TRUE)
}
sampleInfo.i <- paste('!', sampleInfoTitle[-1], ' = ', sampleInfo[i,-1], '\n', sep='', collapse='')
sampleInfo.i <- gsub("'", "\\'", sampleInfo.i)
cat(sampleInfo.i, file=fileName, append=TRUE, sep='')
tableHead <- "ID_REF"
cat("#ID_REF = Illumina ID\n", file=fileName, append=TRUE)
if (!is.null(nuID)) {
cat("#nuID = nucleotide universal IDentifier (nuID), convertible to and from probe sequence. See Bioconductor lumi package for more details.\n", file=fileName, append=TRUE)
tableHead <- c(tableHead, "nuID")
}
cat("#VALUE = Beta-value\n", file=fileName, append=TRUE)
if (!is.null(expr)) cat("#RAW_VALUE = raw Beta-value\n", file=fileName, append=TRUE)
tableHead <- c(tableHead, "VALUE")
if (!is.null(expr)) tableHead <- c(tableHead, "RAW_VALUE")
if (!is.null(methyData)) {
cat("#METHYLATED = the intensities measured by methylated probes\n", file=fileName, append=TRUE)
tableHead <- c(tableHead, "METHYLATED")
}
if (!is.null(unmethyData)) {
cat("#UNMETHYLATED = the intensities measured by unmethylated probes\n", file=fileName, append=TRUE)
tableHead <- c(tableHead, "UNMETHYLATED")
}
if (!is.null(detect)) {
cat("#Detection_Pval = the detection p-value of the probe\n", file=fileName, append=TRUE)
tableHead <- c(tableHead, "Detection_Pval")
}
sampleTable.i <- probeId
if (!is.null(nuID)) sampleTable.i <- cbind(sampleTable.i, nuID)
sampleTable.i <- cbind(sampleTable.i, expr.norm[,sampleID[i]])
if (!is.null(expr)) sampleTable.i <- cbind(sampleTable.i, expr[,sampleID[i]])
if (!is.null(methyData)) sampleTable.i <- cbind(sampleTable.i, methyData[,sampleID[i]])
if (!is.null(unmethyData)) sampleTable.i <- cbind(sampleTable.i, unmethyData[,sampleID[i]])
if (!is.null(detect)) sampleTable.i <- cbind(sampleTable.i, detect[,sampleID[i]])
sampleTable.i <- rbind(tableHead, sampleTable.i)
cat("!sample_table_begin\n", file=fileName, append=TRUE)
write.table(sampleTable.i, sep='\t', quote=FALSE, file=fileName, append=TRUE, col.names=FALSE, row.names=FALSE)
cat("!sample_table_end\n", file=fileName, append=TRUE)
}
if (supplementaryRdata) {
methyLumiM <- methyLumiM[,sampleID]
if (!is.null(methyLumiM.raw)) {
methyLumiM.raw <- methyLumiM.raw[,sampleID]
save(methyLumiM, methyLumiM.raw, sampleInfo, file='supplementaryData.Rdata')
} else {
save(methyLumiM, sampleInfo, file='supplementaryData.Rdata')
}
}
}
## ---------------------------------------------------------------------------------
# Functions related with estimating methylation status
## EM estimation of the parameters s1, s2 and theta
## E-step: determine the class of each probe
## M-step:
## 1. estimation the proportion of two classes
## 2. Estimate of optimized s1 and s2 using optimization method
## 3. Estimate theta based on equation
# fittedGamma <- gammaFitEM(M[,1], initialFit=NULL, maxIteration=50, tol=0.0001, plotMode=T, verbose=T)
gammaFitEM <- function(M, initialFit=NULL, fix.k=NULL, weighted=TRUE, maxIteration=50, tol=0.0001, plotMode=FALSE, truncate=FALSE, verbose=FALSE) {
fix.theta=NULL
eps <- 10^-5
# if (!require(nleqslv)) stop("Please install nleqslv package!\n")
if (!is.null(fix.k)) {
if (length(fix.k) == 1) fix.k <- c(fix.k, fix.k)
}
if (!is.null(fix.theta)) {
if (length(fix.theta) == 1) fix.theta <- c(fix.theta, fix.theta)
}
fs1 <- function(s1, mi, zi, theta, k, n) {
nonNegativeInd <- which(mi > s1) # this setting will allow to shift the profile without considering the small part of the trailing points
sum(zi[nonNegativeInd]/(mi[nonNegativeInd]-s1)) - n /((k-1)*theta)
# mi[mi < s1] <- s1 + eps
# sum(zi/(mi-s1)) - n /((k-1)*theta)
#sum(zi/abs(mi-s1)) - n /((k-1)*theta)
}
fs2 <- function(s2, mi, zi, theta, k, n) {
nonNegativeInd <- which(mi < s2) # this setting will allow to shift the profile without considering the small part of the trailing points
sum(zi[nonNegativeInd]/(s2-mi[nonNegativeInd])) - n /((k-1)*theta)
#mi[mi > s2] <- s2 - eps
#sum(zi/(s2-mi)) - n /((k-1)*theta)
# sum(zi/abs(s2-mi)) - n /((k-1)*theta)
}
fk1 <- function(k1, mi, zi, s, n) {
nonNegativeInd <- which(mi > s) # this setting will allow to shift the profile without considering the small part of the trailing points
log(k1) - digamma(k1) - log(sum(zi[nonNegativeInd]*(mi[nonNegativeInd]-s)/n)) + sum(zi[nonNegativeInd]*log(mi[nonNegativeInd]-s)/n)
# mi[mi < s] <- s + eps
# log(k1) - digamma(k1) - log(sum(zi*(mi-s)/n)) + sum(zi*log(mi-s)/n)
}
fk2 <- function(k2, mi, zi, s, n) {
nonNegativeInd <- which(mi < s) # this setting will allow to shift the profile without considering the small part of the trailing points
log(k2) - digamma(k2) - log(sum(zi[nonNegativeInd]*(s-mi[nonNegativeInd])/n)) + sum(zi[nonNegativeInd]*log(s-mi[nonNegativeInd])/n)
# mi[mi > s] <- s - eps
# log(k2) - digamma(k2) - log(sum(zi*(s-mi)/n)) + sum(zi*log(s-mi)/n)
}
if (is.matrix(M)) {
if (ncol(M) > 1) cat("Only the first column of the matrix was processed!\n")
x <- M[,1]
} else {
x <- M
}
# Initial value estimation got from grid search
initialFit <- .initialGammaEstimation(x, initialFit=initialFit)
k <- initialFit$k
theta <- initialFit$theta
s <- initialFit$shift
p <- initialFit$proportion
Mode <- initialFit$mode
k[k < 2] <- 2
theta[theta < 0.2] <- 0.2
if (!is.null(fix.k)) k[!is.na(fix.k)] <- fix.k[!is.na(fix.k)]
if (!is.null(fix.theta)) theta[!is.na(fix.theta)] <- fix.theta[!is.na(fix.theta)]
if (!is.null(fix.k) || !is.null(fix.theta)) s <- c(Mode[1] - (k[1]-1)*theta[1], Mode[2] + (k[2]-1)*theta[2])
if (verbose) {
if (plotMode) plotGammaFit(x, k=k, theta=theta, shift=s, proportion=p)
cat("\nInitial estimation:\n")
cat("k:", k, "\n")
cat("s:", s, "\n")
cat("theta:", theta, "\n")
cat("p:", p, "\n")
f1 <- dgamma(x-s[1], shape=k[1], scale=theta[1])
f2 <- dgamma(s[2]-x, shape=k[2], scale=theta[2])
logLikelihood <- sum(log(p[1] * f1 + p[2]*f2))
cat("logLikelihood:", logLikelihood, "\n")
}
iter <- 1
N <- length(x)
while (iter < maxIteration) {
if (any(theta < eps) || any(k < 2)) {
cat('It does not converge based on the current initial values!\n')
logLikelihood <- -Inf
return(list(logLikelihood=logLikelihood, k=k, theta=theta, shift=s, proportion=p, mode=Mode, probability=NULL))
}
Mode <- c(s[1] + (k[1]-1)*theta[1], s[2] - (k[2]-1)*theta[2])
## E-step
f1 <- dgamma(x-s[1], shape=k[1], scale=theta[1])
f2 <- dgamma(s[2]-x, shape=k[2], scale=theta[2])
## TO DO: add weighted function instead of truncate 10/25/2010
if (weighted) {
# down weight the long tails of two component densities beyond their modes
w1 <- rep(1, length(f1)); w2 <- rep(1, length(f2))
w1[x > Mode[2]] <- f2[x > Mode[2]] / max(f2)
w2[x < Mode[1]] <- f1[x < Mode[1]] / max(f1)
f1 <- f1 * w1
f2 <- f2 * w2
# adjust the density values because of the trunction
# f1 <- f1 / (pgamma(Mode[2]-s[1], shape=k[1], scale=theta[1], lower.tail = TRUE))
# f2 <- f2 / (pgamma(s[2]-Mode[1], shape=k[2], scale=theta[2], lower.tail = TRUE))
# z1 <- p[1] * f1 / (p[1] * f1 + p[2] * f2) # + eps * 10)
# z1[x > Mode[2]] <- 0
# z1[x < Mode[1]] <- 1
# f1[x > Mode[2]] <- 0
# f2[x < Mode[1]] <- 0
}
z1 <- p[1] * f1 / (p[1] * f1 + p[2] * f2 + eps * 10) # posterior probability of unmethylated
z2 <- 1 - z1
if (verbose && iter > 1) {
# logLikelihood <- sum((z1*log(p[1] * f1))[z1 > 0]) + sum((z2* log(p[2]*f2))[z2 > 0])
ll <- p[1] * f1 + p[2]*f2
ll[ll < eps] <- eps
logLikelihood <- sum(log(ll))
cat("logLikelihood:", logLikelihood, "\n")
}
## M-step (estimate parameters: s, theta and k)
# update the proportion of each class
n1 <- sum(z1, na.rm=TRUE); n2 <- N - n1
p.new <- n1 / N
p.new <- c(p.new, 1 - p.new)
if (abs(p.new[1]-p[1]) < tol) break
p <- p.new
## Estimation of s, theta and k
iter.inter <- 1
while (iter.inter <= 10) {
# set the range of s parameter fss(s1, mi, zi, theta, k, n)
s1.new <- nleqslv(s[1], fs1, mi=x, zi=z1, theta=theta[1], k=k[1], n=n1)$x
s2.new <- nleqslv(s[2], fs2, mi=x, zi=z2, theta=theta[2], k=k[2], n=n2)$x
s.new <- c(s1.new, s2.new)
## Estimation of k # fk1(k1, mi, zi, s, n)
if (is.null(fix.k)) {
k1.new <- nleqslv(k[1], fk1, mi=x, zi=z1, s=s.new[1], n=n1)$x
k2.new <- nleqslv(k[2], fk2, mi=x, zi=z2, s=s.new[2], n=n2)$x
k.new <- c(k1.new, k2.new)
} else {
k.new <- fix.k
if (any(is.na(k.new))) {
if (is.na(fix.k[1])) k.new[1] <- nleqslv(k[1], fk1, mi=x, zi=z1, s=s.new[1], n=n1)$x
if (is.na(fix.k[2])) k.new[2] <- nleqslv(k[2], fk2, mi=x, zi=z2, s=s.new[2], n=n2)$x
}
}
## Estimation of theta
if (is.null(fix.theta)) {
theta.new <- c(sum(z1*(x - s.new[1]))/(k.new[1]*n1), sum(z2*(s.new[2] - x))/(k.new[2]*n2))
} else {
theta.new <- fix.theta
if (any(is.na(theta.new))) {
if (is.na(fix.theta[1])) theta.new[1] <- sum(z1*(x - s.new[1]))/(k.new[1]*n1)
if (is.na(fix.theta[2])) theta.new[2] <- sum(z2*(s.new[2] - x))/(k.new[2]*n2)
}
}
if (min(abs(s.new - s), na.rm=TRUE) < tol && min(abs(k.new - k), na.rm=TRUE) < tol && min(abs(theta.new - theta), na.rm=TRUE) < tol) break
# if (verbose) cat("s:", s, " k:", k, " theta:", theta, "\n")
s <- s.new; k <- k.new; theta <- theta.new
# if (any(theta < 0.2) || any(k < 2)) {
if (any(theta < eps) || any(k < 2)) {
cat('It does not converge based on the current initial values!\n')
logLikelihood <- -Inf
return(list(logLikelihood=logLikelihood, k=k, theta=theta, shift=s, proportion=p, mode=Mode, probability=NULL))
}
iter.inter <- iter.inter + 1
}
if (verbose) {
cat("\nIteration ", iter, "\n")
cat("k:", k, "\n")
cat("s:", s, "\n")
cat("theta:", theta, "\n")
cat("p:", p, "\n")
cat("Mode:", Mode, "\n")
if (plotMode) plotGammaFit(x, k=k, theta=theta, shift=s, proportion=p)
}
iter <- iter + 1
}
if (iter == maxIteration) {
Mode <- c(s[1] + (k[1]-1)*theta[1], s[2] - (k[2]-1)*theta[2])
f1 <- dgamma(x-s[1], shape=k[1], scale=theta[1])
f2 <- dgamma(s[2]-x, shape=k[2], scale=theta[2])
if (truncate) {
f1 <- f1 / (pgamma(Mode[2]-s[1], shape=k[1], scale=theta[1], lower.tail = TRUE))
f2 <- f2 / (pgamma(s[2]-Mode[1], shape=k[2], scale=theta[2], lower.tail = TRUE))
z1 <- p[1] * f1 / (p[1] * f1 + p[2] * f2) # + eps)
z1[x > Mode[2]] <- 0
z1[x < Mode[1]] <- 1
f1[x > Mode[2]] <- 0
f2[x < Mode[1]] <- 0
} else {
z1 <- p[1] * f1 / (p[1] * f1 + p[2] * f2) # + eps) # posterior probability of unmethylated
}
z2 <- 1 - z1
}
# estimate log-likelihood
ll <- p[1] * f1 + p[2]*f2
ll[ll < eps] <- eps
logLikelihood <- sum(log(ll))
if (iter == maxIteration && verbose) cat("logLikelihood:", logLikelihood, "\n")
# return the methylation/unmethylation estimation of each probe
## check class index to make sure those extreme probe belonging to the corresponding group
probability <- cbind(z1, z2)
colnames(probability) <- c('unmethylated', 'methylated')
Mode <- c(s[1] + (k[1]-1)*theta[1], s[2] - (k[2]-1)*theta[2])
if (plotMode) plotGammaFit(x, k=k, theta=theta, shift=s, proportion=p)
fitResult <- list(logLikelihood=logLikelihood, k=k, theta=theta, shift=s, proportion=p, mode=Mode, probability=probability)
class(fitResult) <- 'gammaFit'
return(fitResult)
}
# initial gamma parameters estimation
.initialGammaEstimation <- function(x, initialFit=NULL) {
k <- theta <- s <- p <- Mode <- NULL
if (!is.null(initialFit)) {
k <- initialFit$k
theta <- initialFit$theta
s <- initialFit$shift
p <- initialFit$proportion
Mode <- initialFit$mode
}
if (!is.null(k) && !is.null(theta) &&!is.null(s) && !is.null(p) && !is.null(Mode)) return(initialFit)
# mode positions
if (is.null(Mode)) {
dd <- density(x)
density.m <- dd$y
density.x <- dd$x # x-axis of density plot (methylation ratio)
midpoint <- mean(quantile(x, c(0.01, 0.99)))
unmethy.ind <- density.x <= midpoint
P1 <- density.x[unmethy.ind][which.max(density.m[unmethy.ind])]
if (abs(P1 - midpoint) < 0.1) P1 <- (midpoint + quantile(x, 0.01))/2
methy.ind <- density.x >= midpoint
P2 <- density.x[methy.ind][which.max(density.m[methy.ind])]
if (abs(P2 - midpoint) < 0.1) P2 <- (midpoint + quantile(x, 0.99))/2
Mode <- c(P1, P2)
}
# update the class index
unmethy.ind <- which(x < (Mode[1] + Mode[2])/2)
methy.ind <- which(x > (Mode[1] + Mode[2])/2)
# percentage of two classes
if (is.null(p)) {
p <- c(length(unmethy.ind)/length(x), length(methy.ind)/length(x))
}
mean.unmethy <- mean(x[unmethy.ind])
mean.methy <- mean(x[methy.ind])
var.unmethy <- var(x[unmethy.ind])
var.methy <- var(x[methy.ind])
if (is.null(theta)) {
theta <- c(mean.unmethy - Mode[1], Mode[2] - mean.methy)
theta[theta < 0.2] <- 0.2
}
if (is.null(k)) {
k <- c(round(var.unmethy/theta[1]^2), round(var.methy/theta[2]^2))
k[k < 2] <- 2
}
if (is.null(s)) s <- c(mean.unmethy - k[1]*theta[1], mean.methy + k[2]*theta[2])
fitResult <- list(k=k, theta=theta, shift=s, proportion=p, mode=Mode)
class(fitResult) <- 'gammaFit'
return(fitResult)
}
# plot gammFit results
plotGammaFit <- function(x, gammaFit=NULL, k=NULL, theta=NULL, shift=NULL, proportion=NULL, plotType=c('histogram', 'density'), ...) {
plotType <- match.arg(plotType)
if (!is.null(gammaFit)) {
if (class(gammaFit) != 'gammaFit') stop("gammaFit should be an object of 'gammaFit' class!")
k <- gammaFit$k
theta <- gammaFit$theta
shift <- gammaFit$shift
proportion <- gammaFit$proportion
}
x <- sort(x)
if (is.null(k) || is.null(theta) || is.null(shift) || is.null(proportion)) stop("Information of parameters k, theta, shift and proportion is required!\n")
y1 = dgamma(x-shift[1], shape=k[1], scale=theta[1])
y2 = dgamma(shift[2]-x, shape=k[2], scale=theta[2])
if (plotType == 'histogram') {
hist(x, 50, probability=TRUE, main='Compare data histogram and fitted distribution', xlab='M value', ...)
} else {
plot(density(x), type='l', lwd=1.5, main='Compare data density and fitted distribution', xlab="M value", ...)
}
lines(x, y1*proportion[1] + y2 * proportion[2], col=2, lty=3, lwd=1.5)
lines(x, y1*proportion[1], col=3, lty=2)
lines(x, y2*proportion[2], col=4, lty=2)
return(invisible(TRUE))
}
# estimate methylation call probability based on gamma fit parameters
methylationCall <- function(x, threshold=0.95, ...) {
if (length(threshold) == 1) threshold <- rep(threshold, 2)
probability <- NULL
if (class(x) != 'gammaFit') {
fit <- gammaFitEM(x, ...)
} else {
fit <- x
}
probability <- fit$probability
# methyCall <- probability > threshold
methyCall <- apply(probability, 1, function(x) {
cc <- "Margin"
if (x[1] > threshold[1]) {
cc <- "Unmethy"
} else if (x[2] > threshold[2]) {
cc <- "Methy"
}
return(cc)
})
attr(methyCall, "probability") <- probability[,"methylated"]
return(methyCall)
}
# methylation status
lumiMethyStatus <- function(methyLumiM, ...)
{
if (!is(methyLumiM, 'MethyLumiM')) {
stop('The object should be class "MethyLumiM" inherited!')
}
history.submitted <- as.character(Sys.time())
M <- exprs(methyLumiM)
M.status <- M.prob <- NULL
for (i in 1:ncol(M)) {
status.i <- methylationCall(M[,i], ...)
prob.i <- attr(status.i, "probability")
M.status <- cbind(M.status, status.i)
M.prob <- cbind(M.prob, prob.i)
}
rownames(M.status) <- rownames(M.prob) <- rownames(M)
colnames(M.status) <- colnames(M.prob) <- colnames(M)
history.finished <- as.character(Sys.time())
# history.command <- capture.output(print(match.call(lumiMethyStatus)))
history.command <- paste(deparse(match.call(lumiMethyStatus)), collapse='')
lumiVersion <- packageDescription('lumi')$Version
attr(M.status, "history") <- rbind(methyLumiM@history, data.frame(submitted=history.submitted,
finished=history.finished, command=history.command, lumiVersion=lumiVersion))
attr(M.status, "probability") <- M.prob
return(M.status)
}
## Get chromosome information of a MethyLumiM or MethyGenoSet object
# chrInfo <- getChrInfo(methyLumiM, lib=lib)
getChrInfo <- function(methyData, lib=NULL, ...) {
if (is(methyData, 'MethyLumiM')) {
methyData <- addAnnotationInfo(methyData, lib=lib, ...)
ff <- fData(methyData)
probeList <- featureNames(methyData)
} else if (is.character(methyData)) {
ff <- addAnnotationInfo(methyData, lib=lib, ...)
probeList <- rownames(methyData)
} else {
stop('methyData should be a MethyLumiM object or a character vector')
}
chrInfo <- data.frame(PROBEID=probeList, ff[,c('CHROMOSOME', 'POSITION')])
if (!is.null(ff$END))
chrInfo <- data.frame(chrInfo, END=ff$END)
return(chrInfo)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.