R/weightM.R

Defines functions weightM

Documented in weightM

#' weightM 
#'
#' Builds a c (number of compounds) by m (number of masses) matrix of 
#' compound likelihoods with the gaussian error function erf, based 
#' on mass accuracy and optionally on isotopic patterns.
#' @param isoPatt is the likelihood data.frame generated by incorporate.isotopes function, 
#' or reactionM the data.frame of compound's information. 
#' @param useIso logical indicating whether to use or not isotope information in the likelihood. 
#' @param intervals a vector of SNR numerical intervals, to which different
#' carbon offset should be added to predicted C-number. 
#' @param offset vector of empirically estimated carbon offset to be added
#' to predicted C-number. 
#' @param massWeigth is the contribution parameter of the probabilistic model. 
#' @param likelihood which noise model to use, "erfc" to complementary error function,
#' or "gaussian" to standard gaussian with two sd corresponding to the given p.p.m precision.
#' @param precision equipment mass accuracy, usually the same used in exact mass search. 
#' @return A matrix wm of likelihood weights. 
#'
#' @export

weightM <-
function(isoPatt, useIso=TRUE, intervals=NULL, offset=NULL, massWeigth=1, likelihood="erfc", precision=1) {
 
	if(sum(grepl("unknown", isoPatt[,"id"]))) {
		 isoPatt <- isoPatt[isoPatt[,"id"]!="unknown",] 
		 isoPatt <- as.data.frame(isoPatt)
		 isoPatt[,"massObs"] <- as.numeric(as.matrix(isoPatt[,"massObs"]))
		 isoPatt[,"massDB"] <- as.numeric(as.matrix(isoPatt[,"massDB"]))
	}
        vm1a <- abs(isoPatt[,"massObs"]-isoPatt[,"massDB"])/isoPatt[,"massDB"]
	m1 <- mean(vm1a)
	s1 <- sd(vm1a)	

        erfc <- function(x, m, s) massWeigth * pnorm(((x-m)/s) * sqrt(2), lower.tail = FALSE) # standard normal, remember the lower parameter
	if(likelihood=="erfc"){
		getLik <- function(isoPatt) {
				 vm1 <- abs(isoPatt[,"massObs"]-isoPatt[,"massDB"])/isoPatt[,"massDB"]
				erfc(vm1,m1,s1)
		}
	}
	if(likelihood=="gaussian"){
		precision <- 1/((1e-6*precision/2)^2)	
		getLik <- function(isoPatt) {
		x <- isoPatt[,"massObs"]
		y <- isoPatt[,"massDB"]
		 return(exp((-0.5*precision)*(x/y-1)^2))
		}
	}
	

	if(!useIso) {
		isoPatt$lik <- getLik(isoPatt)

		coords <- tapply(1:nrow(isoPatt), isoPatt[,"molIonID"], function(x) x)
		coords2 <- unlist(lapply(coords, function(x) rep(x[1], length(x)))) 
		coords <- coords[order(unique(coords2))] 
		wm <- matrix(0, nrow=nrow(isoPatt), ncol=length(unique(coords))) 
		for(i in 1:ncol(wm)) wm[coords[[i]], i] <- isoPatt[coords[[i]], "lik"]
		return(list(wm=wm, isoPatt=isoPatt))
	}
	else {
		isoPatt$lik <- getLik(isoPatt)
		getInterval <- function(intervals, snr) {
			if(is.na(snr) | snr==0) return(0)
			int <- c()
			for(i in 1:length(intervals)) {
				if(i==length(intervals)) int <- c(int, snr> intervals[i]) 
				else int <- c(int, snr> intervals[i] & snr < intervals[i+1])
			}
			return(which(int))
		}

	isoPatt$id_filter <- sapply(isoPatt[, "snr"], function(x) getInterval(intervals, x))  
	cm <- offset

	isoPatt$lim1 <- apply(isoPatt[, c("predcnun", "id_filter", "sd")], 1, function(x) if(sum(is.na(x)) | sum(x==0)) 0 else x[1]+cm[x[2]]-3*x[3]) 
	isoPatt$lim2 <- apply(isoPatt[, c("predcnun", "id_filter", "sd")], 1, function(x) if(sum(is.na(x)) | sum(x==0)) 0 else x[1]+cm[x[2]]+3*x[3])   
	coords <- tapply(1:nrow(isoPatt), isoPatt[,"massObs"], function(x) x)
	coords <- unlist(lapply(coords, function(x) rep(x[1], length(x)))) 
	coords <- coords[order(coords)] 

	subNeg <- function(x) {
		if(sum(is.na(x[, c("lim1", "lim2")])) | sum(x[, c("lim1", "lim2")]<0)) {
			x[, c("lim1", "lim2")] <- 0 
			x
		}
		else{
			x
		}
	}

	isoPatt <- do.call("rbind", by(isoPatt, coords, subNeg)) 

	isoPatt$filter <- apply(isoPatt,1, 
					function(x) { 
						# for debugging 
						#options(warn = i); cat("\n warn =", w, "\n")
						#x <- isoPatt[i,]
						if(length(is.na(x))) x[is.na(x)] <- 0 
						x <- as.numeric(x[3:length(x)]); 
						return(ifelse((x[10]==0 & x[11]==0) | (x[1]>=round(x[10]) & x[1]<=round(x[11])),1,0.1))
					}
	) 

	coords <- tapply(1:nrow(isoPatt), isoPatt[,"massObs"], function(x) x)
	coords2 <- unlist(lapply(coords, function(x) rep(x[1], length(x)))) 
	coords <- coords[order(unique(coords2))] 

	wm <- matrix(0, nrow=nrow(isoPatt), ncol=length(unique(coords))) 
	for(i in 1:ncol(wm)) wm[coords[[i]], i] <- isoPatt[coords[[i]], "lik"]*isoPatt[coords[[i]], "filter"] 
	return(list(wm=wm, isoPatt=isoPatt))
	}
}
rsilvabioinfo/ProbMetab documentation built on May 28, 2019, 3:32 a.m.