R/lumiMethyNorm.R

Defines functions lumiMethyNorm

Documented in lumiMethyNorm

#' Infinium Normalization & M-value computation
#' 
#' Normalize the Infinium signal intensitty data and compute M-value.
#' Input data should be unnormalized data containing;
#' TargetID: Illumina ID
#' Sample1.Signal_A: Signal of Unmethylated probe of sample 1
#' Sample1.Signal_B: Signal of Methylated probe of sample 1
#' Sample1.Detection Pval: detection P value of sample 1
#' (sample_names: vector of sample names)
#' 
#' @param fileName unnourmalized intensity data
#' @param idatpath directroy of idata files
#' @param inputtype type of input. "signal": a row signal txt file generated by genomeStudio or "idat": idat files
#' @param sample_names a vetor of sample names
#' 
#' @import FDb.InfiniumMethylation.hg19
#' @importFrom lumi lumiMethyR addAnnotationInfo plotSampleRelation lumiMethyC plotColorBias1D boxplotColorBias lumiMethyN density detectionCall
#' @importFrom grDevices pdf dev.off
#' @importFrom utils write.table
#' @importFrom Biobase write.exprs exprs sampleNames sampleNames<-
#' @importFrom wateRmelon readEPIC
#' @importFrom GEOquery getGEO Meta
#' 
#' 
#' @return a object of normalized M-value
#' @keywords Normalization, lumi, M-value
#' @export

lumiMethyNorm <- function(fileName = "TableControl.txt", idatpath = getwd(), inputtype, sample_names=FALSE, force = TRUE){	
	if (missing(inputtype)) stop("inputtype notfound! 'signal' or 'idata'")

	############### data input #############################
	cat("Reading data ...\n")

	if(toupper(inputtype) == "SIGNAL"){
		data.lumiMethy <- lumiMethyR(fileName)
	}else if(toupper(inputtype) == "IDAT"){
		data.mls <- readEPIC(idatPath=idatpath, force = force)
		data.lumiMethy <- as(data.mls, 'MethyLumiM')
	}
	addAnnotationInfo(data.lumiMethy, lib = 'FDb.InfiniumMethylation.hg19', annotationColumn=c('COLOR_CHANNEL', 'CHROMOSOME', 'POSITION'))

	if(sample_names != FALSE){
		sampleNames(data.lumiMethy) <- sample_names    #convert sampleID to sample name
	}else if(toupper(inputtype) == "IDAT" && all(grepl("^GSM", sampleNames(data.lumiMethy)))){
		cat("\nsample names are converted from GEO IDs to GEO sample titles.\n")
		idat_name <- sampleNames(data.lumiMethy)
		idat_GEOid <- sapply(strsplit(idat_name, "_"), function(x){x[1]})
		info_GEOList <- lapply(idat_GEOid, getGEO)
		sample_names <- sapply(info_GEOList, function(x){Meta(x)$title})
		sampleNames(data.lumiMethy) <- sample_names    #convert idat file it to sample title
	}else if(toupper(inputtype) == "IDAT" && any(!grepl("^GSM", sampleNames(data.lumiMethy)))){
		cat("\nAt least one idat file name is not GEOID_barcode_location... format.\n")
		cat("GEO sample title can not be downloaded.\n")
	}

	############### PCA plot ###################################
	dir.create("Process_Result")    #make a directory to store the processing results
	##PCA plot
	pdf("Process_Result/pca.pdf")
	plotSampleRelation(data.lumiMethy, method='mds', cv.Th=0)
	dev.off()
	################# Color balance adjustment between two color channels ###########################
	lumiMethy.c.adj <- lumiMethyC(data.lumiMethy)
	## Check color balance after color balance adjustment
	pdf("Process_Result/col.adj.pdf")
	par(mfrow=c(2,2))
	plotColorBias1D(data.lumiMethy, channel='sum')    #density plot of raw
	boxplotColorBias(data.lumiMethy, channel='sum')    #boxplot of raw
	plotColorBias1D(lumiMethy.c.adj, channel='sum')    #density plot of adjusted
	boxplotColorBias(lumiMethy.c.adj, channel='sum')    #box plot of adjusted
	dev.off()
	############################### Normalization###############################
	## Perform SSN normalization based on color balance adjusted data
	## perform quantile normalization based on color balance adjusted data
	lumiMethy.c.q <- lumiMethyN(lumiMethy.c.adj, method='quantile')
	## plot the density of M-values before and after quantile normalization
	pdf("Process_Result/normalize.pdf")
	par(mfrow=c(2,2))
	density(data.lumiMethy, main="Density plot of M-value after quantile normalization")
	plotSampleRelation(data.lumiMethy, method='cluster', cv.Th=0)
	density(lumiMethy.c.q, main="Density plot of M-value after quantile normalization")
	plotSampleRelation(lumiMethy.c.q, method='cluster', cv.Th=0)
	dev.off()
	###################### output the normlized M-value as a Tab-separated text file ###########################
	dataMatrix <- exprs(lumiMethy.c.q)
	write.exprs(lumiMethy.c.q, file='processed_Mval.txt')
	#################### Remove the probes which is low detection p-value among all sample.##########################
	presentCount <- detectionCall(data.lumiMethy, Th = 0.01)	#Sample number of "Present" of a probe.
	selDataMatrix <- dataMatrix[presentCount > 0,]
	write.table(selDataMatrix, file="sel_processed_Mval.txt", sep="\t", quote=F)
	cat("Normalization & M-value computation succeeded!\n")
	return(selDataMatrix)
}
takahirosuzuki1980/test documentation built on April 2, 2021, 11 a.m.