R/NormiR.R
In ExiMiR: R functions for the normalization of Exiqon miRNA array data

Documented in bg.correct.miR NormiR norm.miR summarize.miR

# File: NormiR.R
# 
# Author: Sylvain Gubian, Alain Sewer, PMP SA
# Aim: Set of functions for Exiqon data normalization

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

#########################################################################################

NormiR <- function(
		abatch,
		# background correction
		bg.correct = TRUE,
		bgcorrect.method = 'auto',
		bgcorrect.param = list(),
		# normalize
		normalize = TRUE,
		normalize.method = 'spikein',
		normalize.param = list(),
		# pm correction
		pmcorrect.method = 'pmonly',
		pmcorrect.param = list(),
		# expression values
		summary.method = 'medianpolish',
		summary.param = list(),
		summary.subset = NULL,
		# misc.
		verbose = FALSE,
		...
) {
	if (is.null(summary.method)) {
		stop("Please specify a summarization method, for example: 'medianpolish'")
	}
	
	if (is.null(pmcorrect.method)) {
		if (!is.from.createAB(abatch)) {
			stop("Please specify a pmcorrect method, for example: 'pmonly'")
		}
	}
	# For backward compatibility
	#dot.args <- match.call(expand.dots=TRUE)
	dot.args <- list(...)
	if (!is.null(dot.args$background.correct)) {
		bg.correct = dot.args$background.correct
		if (bg.correct) {
			bgcorrect.method = "auto" ;
		}
	}
	if (is.null(bgcorrect.method)) {
		bgcorrect.method = "auto" ;
	}
	
	# End of backward compatibility section
	# Call background correction if asked
	if (isTRUE(bg.correct)) {
		if(isTRUE(verbose)) {
			cat("Background correction...\n")
		}
		ab.bg.corrected <- bg.correct.miR(abatch = abatch, bgcorrect.method=bgcorrect.method, bgcorrect.param=bgcorrect.param, verbose=verbose)
	} else {
		ab.bg.corrected <- abatch
	}
	if (isTRUE(normalize)) {
		if(isTRUE(verbose)) {
			cat("Normalization...\n")
		}
		ab.normalized <- norm.miR(abatch = ab.bg.corrected, normalize.method=normalize.method, normalize.param=normalize.param, verbose = verbose,...)
	} else {
		ab.normalized <- ab.bg.corrected
	}
	
	if(isTRUE(verbose)) {
		cat("Summarization...\n")
	}
	ab.summarized <- summarize.miR(abatch = ab.normalized, pmcorrect.method = pmcorrect.method, pmcorrect.param = pmcorrect.param, summary.method = summary.method, summary.param = summary.param, summary.subset = summary.subset)
	
	return(ab.summarized)
}

bg.correct.miR <- function(abatch, bgcorrect.method="auto", bgcorrect.param = list(), verbose=FALSE) {	
	if (is.from.createAB(abatch)) {
		if (is.null(bgcorrect.param$offset)) {
			bgcorrect.param$offset = 0
		}
		# Creating the limma object for backgroundCorrect function:
		if (is.dual(abatch)) {
			if (bgcorrect.method=="auto")
				bgcorrect.method="normexp"
			if (is.null(bgcorrect.param$normexp.method)) {
				bgcorrect.param$normexp.method = "saddle"
			}
			
			lim.obj <- new("RGList")
			lim.obj$G <- exprs(abatch)[,1:(ncol(exprs(abatch))/2)]
			lim.obj$R <- exprs(abatch)[,((ncol(exprs(abatch))/2)+1):(ncol(exprs(abatch)))]
			if (has.bg(abatch)) {
				lim.obj$Gb <- se.exprs(abatch)[,1:(ncol(exprs(abatch))/2)]
				lim.obj$Rb <- se.exprs(abatch)[,((ncol(exprs(abatch))/2)+1):(ncol(exprs(abatch)))]
			} else {
				lim.obj$Gb <- NULL
				lim.obj$Rb <- NULL
			}
			lim.obj$R <- backgroundCorrect.matrix(lim.obj$R, lim.obj$Rb, method = bgcorrect.method, 
					offset = bgcorrect.param$offset, normexp.method = bgcorrect.param$normexp.method, 
					verbose = verbose)
			lim.obj$G <- backgroundCorrect.matrix(lim.obj$G, lim.obj$Gb, method = bgcorrect.method, 
					offset = bgcorrect.param$offset, normexp.method = bgcorrect.param$normexp.method, 
					verbose = verbose)
			abatch.bg.corrected <- abatch
			exprs(abatch.bg.corrected) <- cbind(lim.obj$G, lim.obj$R)
			if (has.bg(abatch))
				se.exprs(abatch.bg.corrected) <- cbind(lim.obj$Gb, lim.obj$Rb)
		} else {
#			E <- exprs(abatch)[,1:(ncol(exprs(abatch)))]
#			Eb <- NULL
#			if (has.bg(abatch)) {
#				Eb <- se.exprs(abatch)[,1:(ncol(exprs(abatch)))]
#			}
#			E <-  backgroundCorrect.matrix(E=E, Eb=Eb, method=bgcorrect.method,
#					normexp.method= bgcorrect.param$normexp.method,
#					offset=bgcorrect.param$offset,
#					verbose=verbose)
#			abatch.bg.corrected <- abatch
#			exprs(abatch.bg.corrected) <- E
#			if (has.bg(abatch))
#				se.exprs(abatch.bg.corrected) <- Eb
			if (bgcorrect.method=="auto")
				bgcorrect.method="rma"
			if ((bgcorrect.method%in%bgcorrect.methods()))
				abatch.bg.corrected <- bg.correct(abatch, method=bgcorrect.method)
			else {
				stop("Background method not supported for the AffyBatch object provided")
			}
			abatch.bg.corrected <- bg.correct(abatch, method=bgcorrect.method)
		}	
	} else {
		# affy bg.correct
		if (bgcorrect.method=="auto")
			bgcorrect.method="rma"
		if ((bgcorrect.method%in%bgcorrect.methods()))
			abatch.bg.corrected <- bg.correct(abatch, method=bgcorrect.method)
		else {
			stop("Background method not supported for the AffyBatch object provided")
		}
	}
	return(abatch.bg.corrected)
}


norm.miR <- function(abatch, normalize.method ="spikein", normalize.param = list(), verbose=TRUE, ...)
{
	# For backward compatibility
	#dot.args <- match.call(expand.dots=TRUE)
	dot.args <- list(...)
	normalize.method.params <- c(
			"min.corr",
			"loess.span",
			"extrap.points",
			"extrap.method",
			"force.zero",
			"cover.ext",
			"cover.int",
			"max.log2span",
			"figures.show",
			"figures.output")
	if (!is.null(dot.args$method)) {
		normalize.method = dot.args$method;
	}
	for(param in normalize.method.params) {
		if (!is.null(unlist(as.list(dot.args[param])))) {
			normalize.param[param] = dot.args[param];
		}
	}
	if (is.null(normalize.method)) {
		normalize.method = "spikein"
	}
	
	if (normalize.method =="spikein") {
		# Check parameters
		control.params <- list(
				min.corr = 0.5,
				figures.show=TRUE,
				figures.output="display",
				loess.span=-1,
				extrap.points=2,
				extrap.method="mean",
				force.zero=FALSE,
				cover.ext=0.5,
				cover.int=1/3,
				max.log2span=1,
				probeset.list=NULL
		)
		nmsC <- names(control.params)
		control.params[(namc <- names(normalize.param))] <-  normalize.param
		if(length(noNms <- namc[!namc %in% nmsC]))
			warning("Not supported spikein normalization parameters: ", paste(noNms,collapse=", "))
		
		galenv <- getCdfInfo(abatch)
		l <- mget(ls(galenv),galenv)
		
		have.probe.control <- FALSE
		if (!is.null(normalize.param$probeset.list)) {
			if (length(normalize.param$probeset.list)==0) {
				warning("No probe names given as control, using spikes-in controls if any.")	
			}  else {
				# Check if the list matches the probes names we have
				have.probe.control <- TRUE
				#print(normalize.param$probeset.list)
				matches <- match(normalize.param$probeset.list,names(l))
				if (any(is.na(matches))) {
					if (all(is.na(matches))) {
						have.probe.control <- FALSE
						stop("No probe set names given as control match probes names in the AffyBatch object.")
					} else {
						warning("Some probe set names given as control do not match the ones in the Affybatch object.")
					}
				}
				spike.list <- matches[!is.na(matches)]
			}
		}
		
		if (!is.from.createAB(abatch)) {
			# This does not come from createAB, its an original affybatch
			if (!have.probe.control)
				spike.list <-  which(regexpr("^spike_in-control", names(l)) != -1)
			
			if (length(spike.list)==0)
				stop("Can not find any control probes. Please use another normalization method")
			
			# Call the low level spikeinnorm function
			m <- spikeinnorm(abatch, spike.list=spike.list, control.params=control.params, verbose=verbose, channel=0)
		} else {
			if (!have.probe.control)
				spike.list <-  which(regexpr("^spike_control", names(l)) != -1)
			if (length(spike.list)==0)
				stop("Can not find any control probes. Please use another normalization method")
			# If it is dual channel
			if (is.dual(abatch)) {
				if (verbose) {
					cat("Processing first channel...\n")
				}
				m1 <- spikeinnorm(object=abatch, spike.list=spike.list, control.params=control.params, verbose=verbose, channel = 1)
				if (verbose) {
					cat("Processing second channel...\n")
				}
				m2 <- spikeinnorm(object=abatch, spike.list=spike.list, control.params=control.params, verbose=verbose, channel = 2)
				m <- cbind(m1, m2)
			} else {
				# if it is single channel
				m <- spikeinnorm(object=abatch, spike.list=spike.list, control.params=control.params, verbose=verbose, channel = 0)
			}
		}
	} else {
		if (is.from.createAB(abatch)) {
			if (!normalize.method%in%c("quantile","median","mean")) {
				stop("Method not supported for the AffyBatch object provided")
			}
			if (normalize.method=="quantile") {
				if (is.dual(abatch)) {
					lim.obj <- new("RGList")
					lim.obj$G <- exprs(abatch)[,1:(ncol(exprs(abatch))/2)]
					lim.obj$R <- exprs(abatch)[,((ncol(exprs(abatch))/2)+1):(ncol(exprs(abatch)))]
#				if (has.bg(abatch)) {
#					lim.obj$Gb <- se.exprs(abatch)[,1:(ncol(exprs(abatch))/2)]
#					lim.obj$Rb <- se.exprs(abatch)[,((ncol(exprs(abatch))/2)+1):(ncol(exprs(abatch)))]
#				}
					#lim.MA <- normalizeWithinArrays(object = lim.obj, method = "loess")
					#lim.obj$R <- normalizeQuantiles(lim.MA$A + lim.MA$M/2)
					#lim.obj$G <- normalizeQuantiles(lim.MA$A - lim.MA$M/2)
					lim.obj$R <- 2 ^ normalizeQuantiles(log2(lim.obj$R))
					lim.obj$G <- 2 ^ normalizeQuantiles(log2(lim.obj$G))
					m <- cbind(lim.obj$G, lim.obj$R)		
				} else {
					# Single channel
					lim.obj <- new("EListRaw")
					lim.obj$E <- exprs(abatch)[,1:(ncol(exprs(abatch)))]
#				lim.obj$Eb <- NULL
#				if (has.bg(abatch)) {
#					lim.obj$Eb <- se.exprs(abatch)[,1:(ncol(exprs(abatch)))]
#				}
					m <- 2 ^ normalizeQuantiles(log2(lim.obj$E))
				}
			} else {
				if (normalize.method=="median") {
					m <- mediannorm(abatch)
					
				} else if (normalize.method=="mean") {
					m <- meannorm(abatch)
				}
			}
		} else {
#			if (!normalize.method%in%normalize.AffyBatch.methods()) {
#				stop("Method not supported for the AffyBatch object provided")
#			}
			# The abatch comes from Affy pipeline
			m <- exprs(normalize(abatch, method="quantiles"))
		}
	}
	ab <- abatch
	exprs(ab) <- m
	return(ab)
}


spikeinnorm <- function(	object,
		spike.list,
		control.params,
		channel,
		verbose=TRUE) {
	# Spike-in method implementation selects only spikes in the matrix
	galenv <- getCdfInfo(object)
	
	# log2 should not be done because it is done when calling background correction even if option = FALSE
	M <- log2(exprs(object))
	
	if (channel==0) {
		sample.start <- 1;
		sample.end <- length(sampleNames(object))
		channel.name <- "1"
	}
	else if (channel==1) {
		sample.start <- 1;
		sample.end <- length(sampleNames(object)) / 2
		channel.name <- "Green"
	}
	else if (channel==2) {
		sample.start <- length(sampleNames(object)) / 2 + 1;
		sample.end <- length(sampleNames(object))
		channel.name <- "Red"
	}
	
	nbsamples <- (sample.end - sample.start) + 1
	if (nbsamples==1) {
		stop("Can not do the spike calibration with only one sample.")
	}
	l <- mget(ls(galenv),galenv)
	
	# Get the median spike control matrix
	mss <- matrix(0, length(spike.list), nbsamples)
	mss.max <- matrix(0, length(spike.list), nbsamples)
	mss.min <- matrix(0, length(spike.list), nbsamples)
	
	spikeCounts <- vector()
	for(i in 1:length(spike.list)) {
		mat <- l[spike.list[i]]
		mat <- mat[[1]]
		
		for(j in 1:nbsamples) {
			if (j==1) {
				spikeCounts[i] <- length(M[mat[,1],(j+sample.start-1)])
			}
			mss[i,j] <- median(M[mat[,1],(j+sample.start-1)])
			mss.max[i,j] <- max(M[mat[,1],(j+sample.start-1)])
			mss.min[i,j] <- min(M[mat[,1],(j+sample.start-1)])
		}
	}
	# Check mean of coeficients (except diagonal) = Shared variance check
	cmat <- cor(t(mss))
	for(i in 1:nrow(cmat)) {
		cmat[i,i] <- 0
	}
	if (mean(cmat) < control.params$min.corr) {
		if (verbose) {
			mean.display <- floor(mean(cmat) * 100) / 100
			message("Not enough common variance to guarantee good performance of spike-in normalization (", mean.display, " < ", control.params$min.corr, ").\nUsing median normalization...")
		}
		# Call mediannorm
		return(mediannorm(object,channel=channel))
	}
	
	# Checking specificity of spike intensity
	#	vs <- vector()
	#	for(i in 1:nbsamples) {
	#		vs[i] <- (max(M[,i+sample.start-1])-min(M[,i+sample.start-1])) / length(spikeCounts)
	#	}
	if ( median(mss.max-mss.min) > control.params$max.log2span) {
		if (verbose) {
			v.max <- floor(control.params$max.log2span * 100) / 100 
			v.observed <- floor(median(mss.max-mss.min) * 100) / 100
			message("The intensity resolution of the spike-in probe sets is too coarse (",  v.observed, " > ", v.max, ") to guarantee a good performance of spike-in normalization\nUsing median normalization...")
		}
		# Call mediannorm
		return(mediannorm(object, channel=channel))
	}
	
	# Checking external coverage
	ve <- vector()
	for(i in 1:nbsamples) {
		ve[i] <- (max(mss[,i])-min(mss[,i])) / (max(M[,i+sample.start-1])-min(M[,i+sample.start-1]))
	}
	if (median(ve) < control.params$cover.ext) {
		if (verbose) {
			ve.display <- floor(median(ve) * 100) / 100 
			message("The ratio between the intensity range covered by the spike-in probes and the one covered by all probes on the array is too low (",  ve.display, " < ", control.params$cover.ext, ").\nUsing median normalization...")
		}
		# Call mediannorm
		return(mediannorm(object,channel=channel))
	}
	
	# Checking internal coverage
	vi <- vector()
	for(i in 2:nbsamples) {
		vi[i-1] <- max(mss[,i]-mss[,i-1])
	}
	if (median(vi) > control.params$cover.int) {
		if (verbose) {
			vi.display <- floor(median(vi) * 100) / 100 
			message("The size of the largest intensity interval between two consecutive spikes is too large (",  vi.display, " > ", control.params$cover.int, ").\nUsing median normalization...")
		}
		# Call mediannorm
		return(mediannorm(object,channel=channel))
	}
	
	# Use shorts names for spikes control
	if (is.from.createAB(object)) {
		tab <- strsplit(names(l[spike.list]), '_')
	}
	else {
		tab <- strsplit(names(l[spike.list]), '-')
	}
	spikeNames <- list()
	for(i in 1:length(tab)) { spikeNames[i] = tab[[i]][length(tab[[i]])] }
	rownames(mss) <- spikeNames
	colnames(mss) <- (sample.start):(sample.end)
	
	mu <- rowMeans(mss)
	sigma <- apply(mss, 1, sd)
	sigma <- sqrt((ncol(mss)-1)/ncol(mss)) * sigma # We want denominator n not n-1
	
	nss <- (mss - mu) / (sqrt(ncol(mss))*sigma)
	ns <-  colMeans(nss)
	ncss <- matrix(NA, nrow(mss), ncol(mss))
	for(i in 1:ncol(mss)) {ncss[,i] <- nss[,i] - ns[i]}
	mcss <- mu + sqrt(ncol(mss))*sigma * ncss
	
	msp <- vector()
	dmsp <- vector()
	for(s in 1:nbsamples) {
		tmp <- vector()
		dtmp <- vector()
		for(i in 1:length(spike.list)) {
			mat <- l[spike.list[i]]
			mat <- mat[[1]]
			if (i==1) {
				tmp <- as.vector(M[mat[,1],s + sample.start-1])
				dtmp <-as.vector(replicate(length(M[mat[,1],s + sample.start-1]), mcss[i,s]-mss[i,s]))
			}
			else {
				tmp <- append(tmp, as.vector(M[mat[,1],s + sample.start-1]))
				dtmp <- append(dtmp, as.vector(replicate(length(M[mat[,1],s + sample.start-1]), mcss[i,s]-mss[i,s])))
			}
		}
		msp <- cbind(msp, tmp)
		dmsp <- cbind(dmsp, dtmp)
	}
	colnames(msp) <- (sample.start):(sample.end)
	colnames(dmsp) <- (sample.start):(sample.end)
	
	mcsp <- msp + dmsp
	dM <- matrix(NA, nrow(M), nbsamples)
	Mout <- matrix(NA, nrow(M), nbsamples)
	
	if (control.params$figures.show) {
		ordered <- order(mss[,1])
		txt <- paste("Fig.1: Correction of spike-in probe set intensities (", channel.name, " channel)", sep="")
		if (control.params$figures.output=="display") {
			dev.new(title=txt)
		} else {
			filename <- paste("Spike-in_Probe_sets_Intensity_Correction_", channel.name, "_channel.pdf", sep="")
			pdf(file=filename, title=txt)
		}
		
		figure1 <- dev.cur()
		xLegend <- "Array labels"
		yLegend <- "Intensities (log2)"
		#layout(matrix(c(1,2,3,4,5,5), 3, 2, byrow = TRUE),c(3,3,3), c(3,3,1), TRUE)
		layout(matrix(c(1,2,3,4,5,3), 2, 3, byrow = TRUE),c(5,5,3), c(5,5,5), TRUE)
		par(xpd = NA)
		par(oma = c(0, 0, 0, 0))
		
		# Start: Uncorrected
		m <- t(mss[ordered,])
		matplot(m, 
				type="o", col = rainbow(nrow(mss)),
				pch=1:nrow(mss),
				lty=1,
				ylab = yLegend,
				xlab = xLegend,
				main = "Start: Uncorrected",
				axes = FALSE,
				cex = 0.8)
		axis(1, at = seq(1, nrow(m), by = 1))
		axis(2, at = seq(round(min(m)), round(max(m))))
		
		# Step #1: Centered-Normed
		m <- t(nss[ordered,])
		matplot(m, 
				type="o", col = rainbow(nrow(mss)),
				pch=1:nrow(mss),
				lty=1,
				ylab = yLegend,
				xlab = xLegend,
				main = "Step #1: Centered-Normed",
				axes = FALSE,
				cex = 0.8)
		axis(1, at = seq(1, nrow(m), by = 1))
		axis(2, at = seq(-0.75, 0.75, by=0.1))
		
		# Legend
		cols <- rainbow(nrow(mss))
		for(i in 1:nrow(mss)) {
			y <- c(i, i)
			if(i==1) {
				plot(0, i, axes=FALSE, ylim=c(-1.2, nrow(mss)+1), xlim=c(0,1.5), xlab="", ylab="", col=cols[i], pch=i)
			}
			else {
				points(0,i, col=cols[i], pch=i, cex=1.2)
			}
			text(1, y, labels=(spikeNames[ordered])[i]) 
		}
		legend("top", "Spike-in probe sets", bty="n")
		
		# Step #2: Corrected Center-Normed
		m <- t(ncss[ordered,])
		matplot(m, 
				type="o", col = rainbow(nrow(mss)),
				pch=1:nrow(mss),
				lty=1,
				ylab = yLegend,
				xlab = xLegend,
				main = "Step #2: Corrected Center-Normed",
				axes = FALSE,
				cex = 0.8)
		axis(1, at = seq(1, nrow(m), by = 1))
		axis(2, at = seq(round(min(m),1), round(max(m),1), by=0.05))
		
		# Final step: Corrected
		m <- t(mcss[ordered,])
		matplot(m, 
				type="o", col = rainbow(nrow(mss)),
				pch=1:nrow(mss),
				lty=1,	
				ylab = yLegend,
				xlab = xLegend,
				main = "Final step: Corrected",
				axes = FALSE,
				cex = 0.8)
		axis(1, at = seq(1, nrow(m), by = 1))
		axis(2, at = seq(round(min(m)), round(max(m))))
		
		#legend(-3, 3.5, spikeNames[ordered], col=rainbow(nrow(m)), lty=1:nrow(m) ,ncol = 3, cex = 1.0, pch="*")
		layout(1)
		if (!dev.interactive()) dev.off()
		
		txt <- paste("Fig.2: Performance of the spike-in probe set intensity correction (", channel.name, " channel)", sep="")
		if (control.params$figures.output=="display") {
			dev.new(title=txt)
		} else {
			filename <- paste("Performance_of_the_spike-in_probe_set_intensity_correction_", channel.name, "_channel.pdf", sep="")
			pdf(file=filename, title=txt)
		}
		figure2 <- dev.cur()
		nf <- layout(matrix(c(1,2,3,0), 2, 2, byrow=TRUE), c(4,1))
		cmat <- cor(t(mss))
		cmin <- 0
		cmax <- 1
		csize <- ncol(cmat)
		
		ColorRamp <- rgb( seq(0,1,length=256),
				seq(1,0,length=256),
				seq(0,0,length=256))
		ColorLevels <- seq(cmin, cmax, length=length(ColorRamp))
		
		# Data Map
		nb.sources <- 10000
		heat.source <- heat.colors(nb.sources)
		index.map <- vector()
		for(i in 1:nb.sources)
		{
			index.map[i] = 2/pi * asin(i/nb.sources)
		}
		
		heat.panel <-  round(index.map * 128)
		res <- which(!duplicated(heat.panel))
		heat.panel <- heat.source[res]
		
		image(1:csize, 1:csize, t(cmat)[ordered,ordered], col=rev(heat.panel), xlab="Spike-in probe sets", ylab="Spike-in probsets", zlim=c(cmin,cmax), main="Pearson correlation between \nuncorrected spike-in probe sets", axes=FALSE)
		axis(1, at = seq(1, csize, by = 1),(spikeNames[ordered])[1:csize])
		axis(2, at = seq(1, csize, by = 1),(spikeNames[ordered])[1:csize])
		
		# Color Scale
		image(1, ColorLevels,
				matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
				col=rev(heat.panel),
				xlab="",ylab="",
				xaxt="n")
		
		ratio <- (apply(t(mcss),2, var) / apply(t(mss),2, var))[ordered]
		#rownames(ratio) <- spikeNames
		max.ratio <- max(ratio)
		if ( max.ratio <= 1) max.ratio <- 1 
		barplot(ratio,main="Variance ratio after/before correction\nfor spike-in probe set intensities", axes=FALSE, ylim=c(0,max.ratio), col=rainbow(nrow(mcss)), xlab="Spike-in probe sets", ylab="Variance ratio")
		axis(2, at = seq(0,max.ratio,by=0.1))
		# ylab="Corrected variance/Uncorrected variance", xlab="Spike probesets"
		layout(1)
		if (!dev.interactive()) dev.off()
	}
	
	# Set the linit of samples viewed on figure
	max.samplesviewed <- 10
	
	# Set the default loess span value
	if (control.params$loess.span == -1) control.params$loess.span <- 5.0/nrow(mss)
	
	# Compute the number of points to use for extrapolation based of spike control extrap.points needed
	spikeset.order <- order(mss[,1])
	nbpoints <- 0
	
	for(p in 1:(control.params$extrap.points)) {
		nbpoints <- nbpoints + (spikeCounts[spikeset.order])[p] 
	}
	
	points.all <- list()
	for(s in 1:nbsamples) {
		# Sort spike probe signal and correction matrices by intensity
		o <- order(msp[,s])
		x <- msp[o,s]
		y <- dmsp[o,s]
		# Get the number of integer values that need to be extrapoled to cover the all signal
		xa <- seq(from=ceiling(max(x)), to=ceiling(max(M)))
		# Check if number of points needed to be extrapoled are more than 50% of the number of spikes
		#if (length(xa) / length(spikeCounts) > 0.5) {
		#	if (verbose) {
		#		message("The number of points that have to be extrapoled is too high to perform spike-in normalization.\nUsing median normalization...")
		#	}
		#	return(mediannorm(object,channel=channel))
		#}
		
		lenx <- length(x)
		leny <- length(y)
		
		# Compute extrapolation for the queue by doing a regression with the extrap.points last sets
		nbrep <- median(spikeCounts)
		xf <- vector()
		for(i in 1:length(xa)) {
			xf <- c(xf, rep(xa[i], nbrep))
		}
		
		if (control.params$force.zero) {
			# Bug fix, if there are some zeros in the expression matrix, log2 will give -Inf
			# Get the minimum intensity which is not zero
			xb <- rep(min(M[which(M>0)]), (spikeCounts[spikeset.order])[1])
			xe <- c(xb, x, xf)
		}
		else {
			# Bug fix, if there are some zeros in the expression matrix, log2 will give -Inf
			# Get the minimum intensity which is not zero
			xe <- c(min(M[which(M>0)]), x, xf)
		}
		
		if (control.params$extrap.method == "mean") {
			meany <- mean(y[(leny-nbpoints):(leny)])
			prediction <- rep(meany, length(xf))
		}
		else {
			if (control.params$extrap.method == "linear") {
				qdata <- data.frame(x=x[(lenx-nbpoints-1):(lenx)], y=y[(leny-nbpoints-1):(leny)])
				fit <- lm(y ~ x, data=qdata )
				prediction <- predict(fit, data.frame(x=xf))
			}
			else {
				stop("extrap.method not supported")
			}
		}
		
		if (control.params$force.zero) {
			ye <- c(rep(0, (spikeCounts[spikeset.order])[1]), y, prediction)	
		}
		else {
			ye <- c(median(y[1:(spikeCounts[spikeset.order])[1]]), y, prediction)
			#ye <- c(y, prediction)
		}
		
		lo.res <- loess.smooth(xe, ye, span=control.params$loess.span)
		if (control.params$figures.show) {
			points.all[[s]] <- list()
			points.all[[s]]$x <- xe
			points.all[[s]]$y <- ye
			points.all[[s]]$lo <- lo.res
		}
		dM[,s] <- (approx(x=lo.res$x, y=lo.res$y, xout=M[,s]))$y
	}
	
	if (control.params$figures.show) {
		by.samples.figures(points.all, channel.name,figures.output=control.params$figures.output)
	}
	
	Mout <- M[,(sample.start):(sample.end)] + dM
	Mout <- 2 ^ Mout
	return(Mout)
}

by.samples.figures <- function(data, channel, figures.output=c("display","file")) {
	figures.output <- match.arg(figures.output)
	max.samples <- 20
	nbsamples <- length(data)
	nb.figures <- ceiling(nbsamples / max.samples)
	for(i in 1:nb.figures) {
		if ((i*max.samples) > nbsamples) {
			to <- nbsamples
		}
		else {
			to <- i * max.samples
		}
		from <- (i-1) * max.samples + 1		
		txt <- paste("Correction functions (channel ", channel, ") arrays ", from, " to ", to, sep="")
		if (figures.output=="display") {
			dev.new(title=txt)
		} else {
			filename <- paste("Correction_functions_channel_", channel, "_arrays_", from, "_to_", to,".pdf",sep="")
			pdf(file=filename, title=txt)
		}
		def.par <- par(no.readonly = TRUE)
		par(xpd = NA)
		par(oma = c(0, 0, 0, 0))
		nf <- layout(matrix(c(1,2), 1, 2, byrow=TRUE), c(3,1), c(3,3), TRUE)
		by.samples.curve(data, from, to)
		by.samples.legend(from, to)
		layout(1)
		if (!dev.interactive()) dev.off()
	}
}


by.samples.curve <- function(data, sample.from, sample.to) {
	nbsamples <-  sample.to - sample.from + 1
	title <- paste("Correction functions for raw intensity normalization\n(arrays ",sample.from, " to ", sample.to,")", sep="")
	
	dataloy.vect <- vector()
	for(s in 1:nbsamples) {
		index <- s + sample.from - 1
		dataloy.vect <- c(dataloy.vect, data[[index]]$lo$y)
		dataloy.vect <- c(dataloy.vect, data[[index]]$y)
	}
	min.value <- min(dataloy.vect)
	max.value <- max(dataloy.vect)
	
	for(s in 1:nbsamples) {
		index <- s + sample.from - 1
		if (s  == 1) {
			# this is a new figure on the layout
			plot(data[[index]]$x,data[[index]]$y, ylim=c(min.value, max.value), col=s, cex=0.72, pch=s, main=title, xlab="Raw intensities", ylab="Correction functions")
		} else {
			points(data[[index]]$x, data[[index]]$y, col=s, pch=s, cex=0.72)
		}
		lines(data[[index]]$lo,col=s,pch=s)
	}
}

by.samples.legend <- function(sample.from, sample.to) {
	
	nbsamples <-  sample.to - sample.from + 1
	for(s in 1:nbsamples) {
		index <- s + sample.from - 1
		if (s == 1) {
			plot(-0.5, s, axes=FALSE, ylim=c(0, nbsamples+2), xlim=c(0,1.5), xlab="", ylab="", col=s, pch=s)
		}
		else {
			points(-0.5,s, col=s, pch=s, cex=1.2)
		}
		text(1.5, s, labels=(s+sample.from-1), cex=0.6)
	}
	legend(x=-0.5, y=nbsamples+2, "Array labels", bty="n", cex=0.7)
}

meannorm <- function(object, channel=0) {
	if (channel==0) {
		m <- exprs(object)
	} else if (channel==1) {
		m <- exprs(object)[,1:length(sampleNames(object)) / 2]
	} else {
		m <- exprs(object)[,(length(sampleNames(object)) / 2 + 1):(length(sampleNames(object)))]
	}
	m <- exprs(object)
	m <- log2(m)
	cm <- colMeans(m)
	m2 <- m
	for(i in 1:nrow(m)) {
		m2[i,] <- m2[i,] - cm
	}
	m <- m - min(m)
	m <- 2 ^ m
	return(m)
}


mediannorm <- function(object, channel=0) {
	if (channel==0) {
		m <- exprs(object)
	} else if (channel==1) {
		m <- exprs(object)[,1:(length(sampleNames(object)) / 2)]
	} else {
		m <- exprs(object)[,(length(sampleNames(object)) / 2 + 1):(length(sampleNames(object)))]
	}
	m <- log2(m)
	md <- apply(m, 2, median)
	m2 <- m
	for(i in 1:nrow(m)) {
		m2[i,] <- m2[i,] - md
	}
	m <- m - min(m)
	m <- 2 ^ m
	return(m)
}

summarize.miR <- function(
		abatch,
		pmcorrect.method = 'pmonly',
		pmcorrect.param = list(),
		summary.method = 'medianpolish',
		summary.param = list(),
		summary.subset = NULL) {
	if (is.null(summary.method)) {
		stop("Please specify a summarization method, for example: 'medianpolish'")
	}
	
	if (is.null(pmcorrect.method)) {
		if (!is.from.createAB(abatch)) {
			stop("Please specify a pmcorrect method, for example: 'pmonly'")
		} else {
			pmcorrect.method = "pmonly"
		}
	}
	
	
	#pm correct
	if (pmcorrect.method != "pmonly") {
		if (!is.from.createAB(abatch)) {
			if(all(is.na(mm(abatch)))) {
				warning("No mismatch probes specified in the CDF file, using pmcorrect.method='pmonly'")
				pmcorrect.method = "pmonly"
			}
		} else {
			pmcorrect.method = "pmonly"
		}
	}
	return(computeExprSet(abatch, pmcorrect.method=pmcorrect.method,
					summary.method=summary.method, summary.param=summary.param,
					ids=summary.subset, pmcorrect.param=pmcorrect.param ))	
}


is.from.createAB <- function(object) {
	return(any("ExiMiR.channel"%in%names(notes(object))))
}

is.dual <- function(object) {
	if (!is.from.createAB(object))
		FALSE
	else {
		if (notes(object)$ExiMiR.channel == "Dual channel") {
			TRUE
		} else {
			FALSE
		}
	}
}

has.bg <- function(object) {
	if (!is.from.createAB(object))
		FALSE
	else {
		if (notes(object)$ExiMiR.bg == "Contains background") {
			TRUE
		} else {
			FALSE
		}
	}
}
Any scripts or data that you put into this service are public.
ExiMiR documentation built on Nov. 8, 2020, 8:26 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ExiMiR
R functions for the normalization of Exiqon miRNA array data

R/NormiR.R
In ExiMiR: R functions for the normalization of Exiqon miRNA array data

Defines functions has.bg is.dual is.from.createAB summarize.miR mediannorm meannorm by.samples.legend by.samples.curve by.samples.figures spikeinnorm norm.miR bg.correct.miR NormiR

Documented in bg.correct.miR NormiR norm.miR summarize.miR

Try the ExiMiR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ExiMiR R functions for the normalization of Exiqon miRNA array data

R/NormiR.R In ExiMiR: R functions for the normalization of Exiqon miRNA array data

Defines functions has.bg is.dual is.from.createAB summarize.miR mediannorm meannorm by.samples.legend by.samples.curve by.samples.figures spikeinnorm norm.miR bg.correct.miR NormiR

Documented in bg.correct.miR NormiR norm.miR summarize.miR

Try the ExiMiR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ExiMiR
R functions for the normalization of Exiqon miRNA array data

R/NormiR.R
In ExiMiR: R functions for the normalization of Exiqon miRNA array data