library(qvalue)
#' Analyze simulation reslts
#'
#' Analyze results derived from simulated data.
#'
#' @param Sim Simulation output from \code{SimulateDataPaper}
#' @param Forward Target search results from \code{PerformSearch}
#' @param Reverse Decoy search results from \code{PerformSearch}
#' @param pi0 If known, the fraction of simulated spectra whose generating analyte does not lie in the target database. Detault is \code{NULL}, i.e. unknown
#' @param psm.fdr.bin PSM-fdr bins to determine PSM-fdr calibration
#'
#' @return A list \item{S.F}{Bayes factor score for target database} \item{S.R}{Bayes factor score for reverse database} \item{SEE.F}{Score-ordering error rate} \item{Sfdr}{DI-fdr} \item{PSMfdr}{PSM-fdr} \item{pi0.hat}{Estimate for pi0} \item{pvalues}{p-values for the null hypothesis that the spectrum was not generated by an analyte in the target database} \item{Pep.F}{Inferred target peptide} \item{Correct}{Is \code{Pep.F} correct} \item{PSMfdr.bins}{PSM-fdr calibration results}
#' @export
AnalyzeSimulatedData <- function(Sim, Forward, Reverse, pi0=NULL, psm.fdr.bin=c(0,0.01,0.05,seq(0.1,1,by=0.1))) {
out <- AnalyzeSimulatedData.0(Sim = Sim, Forward = Forward, Reverse = Reverse, pi0 = pi0, add.1 = T, psm.fdr.bin = psm.fdr.bin)
out <- Change.pi0(Results = list(out))
return(out[[1]])
}
AnalyzeSimulatedData.0 <- function(Sim, Forward, Reverse, pi0=NULL, add.1=T, psm.fdr.bin=c(0,0.01,0.05,seq(0.1,1,by=0.1))) {
Peptide <- rep(NA,length(Sim$Pep))
Peptide[!is.na(Sim$Pep)] <- paste0(Sim$Pep[!is.na(Sim$Pep)],".",Sim$Charge[!is.na(Sim$Pep)])
Charge <- Sim$Headers.sim$precursorCharge
out <- list()
tmp <- lapply(Forward$Results,function(x){
x <- x$FM
if (is.na(x[1])) {return(list(c(NA,NA),NA))}
max.x <- max(rowSums(rbind(x[,1:4])))
pep.x <- rownames(x)[which.max(rowSums(rbind(x[,1:4])))]
see.x <- 1 - 1/(sum(exp(rowSums(rbind(x[,1:4])) - max.x)))
return(list(c(max.x,see.x),pep.x))
})
tmp2 <- sapply(tmp,function(x){x[[1]]})
out$S.F <- tmp2[1,]
out$SEE.F <- tmp2[2,]; rm(tmp2)
out$Pep.F <- sapply(tmp,function(x){x[[2]]})
out$Correct <- rep(NA,length(out$Pep.F))
ind.correct <- which(Peptide==out$Pep.F)
ind.use <- which(!is.na(out$S.F)); ind.incorrect <- ind.use[!ind.use%in%ind.correct]
out$Correct[ind.correct] <- 1; out$Correct[ind.incorrect] <- 0
out$S.R <- sapply(Reverse$Results,function(x){
x <- x$FM
if (is.na(x[1])) {return(NA)}
return(max(rowSums(rbind(x[,1:4]))))
})
p.values <- Pvalue(S.F = out$S.F, S.R = out$S.R, charge.bin = list(2,3,4:10), charge = Charge, add.1 = add.1, pi0 = pi0)
out$pvalue <- p.values$pvalue
out$Sfdr <- p.values$fdr
out$pi0.hat <- p.values$pi0.hat; out$pi0 <- pi0
out$PSMfdr <- 1 - (1-p.values$fdr)*(1-out$SEE.F)
out$PSMfdr.bins <- t(sapply(1:(length(psm.fdr.bin)-1),function(i){
ind.i <- which(out$PSMfdr>=psm.fdr.bin[i] & out$PSMfdr<psm.fdr.bin[i+1])
c(length(ind.i),mean(out$PSMfdr[ind.i]),1-mean(out$Correct[ind.i],na.rm=T))
}))
colnames(out$PSMfdr.bins) <- c("n","Est.PSMfdr","True.PSMfdr")
out$Sfdr.bins <- t(sapply(1:(length(psm.fdr.bin)-1),function(i){
ind.i <- which(out$Sfdr>=psm.fdr.bin[i] & out$Sfdr<psm.fdr.bin[i+1])
c(length(ind.i),mean(out$Sfdr[ind.i]),1-mean(out$Correct[ind.i],na.rm=T))
}))
colnames(out$Sfdr.bins) <- c("n","Est.Sfdr","True.PSMfdr")
return(out)
}
Pvalue <- function(S.F, S.R, charge, charge.bin=list(2,3,4:10), add.1=T, pi0=NULL) {
out <- rep(NA,length(S.F))
out.fdr <- rep(NA,length(S.F))
ind.use <- !is.na(S.F)
pi0.hat <- rep(NA,length(charge.bin))
for (i in 1:length(charge.bin)) {
ind.c <- which(ind.use & charge%in%charge.bin[[i]])
S.R.c <- S.R[ind.c]
n.c <- length(S.R.c)
if (add.1) {
out[ind.c] <- sapply(S.F[ind.c],function(x){(sum(S.R.c>x)+1)/(n.c+1)})
} else {
out[ind.c] <- sapply(S.F[ind.c],function(x){mean(S.R.c>x)})
}
out.fdr[ind.c] <- qvalue::lfdr(out[ind.c], pi0 = pi0)
pi0.hat[i] <- qvalue::pi0est(p = out[ind.c])$pi0
}
return(list(pvalue=out,fdr=out.fdr,pi0.hat=pi0.hat,pi0=pi0))
}
Bin.prob <- function(Results, type=c("PSMfdr","Sfdr","SOE"), psm.fdr.bin=c(0,0.01,0.05,seq(0.1,1,by=0.1)), all=T, include.1=T) {
type <- match.arg(type,c("PSMfdr","Sfdr","SOE"))
if (all) {
ind <- which(sapply(Results,function(x){!is.null(x)}))
Correct <- unlist(lapply(Results[ind],function(x){x$Correct}))
if (type=="PSMfdr") {prob <- unlist(lapply(Results[ind],function(x){x$PSMfdr}))}
if (type=="Sfdr") {prob <- unlist(lapply(Results[ind],function(x){x$Sfdr}))}
if (type=="SOE") {prob <- unlist(lapply(Results[ind],function(x){x$SEE.F}))}
} else {
Correct <- Results$Correct
if (type == "PSMfdr") {prob <- Results$PSMfdr}
if (type == "Sfdr") {prob <- Results$Sfdr}
if (type == "SOE") {prob <- Results$SEE.F}
}
max.i <- length(psm.fdr.bin)-1
out <- t(sapply(1:max.i,function(i){
ind.i <- which(prob>=psm.fdr.bin[i] & prob<ifelse(i==max.i,ifelse(include.1,1.01,psm.fdr.bin[i+1]),psm.fdr.bin[i+1]))
c(length(ind.i),mean(prob[ind.i]),1-mean(Correct[ind.i],na.rm=T))
}))
colnames(out) <- c("n",paste0("Est.",type),"True.PSMfdr")
return(out)
}
Change.pi0 <- function(Results, pool.charge=T) {
ind <- which(sapply(Results,function(x){!is.null(x)}))
Results[ind] <- lapply(Results[ind],function(x){
if (pool.charge) {
x$pi0.hat <- qvalue::pi0est(p = x$pvalue, pi0.method = "bootstrap")$pi0
x$Sfdr <- qvalue::lfdr(p = x$pvalue, pi0 = x$pi0.hat)
x$PSMfdr <- 1 - (1-x$Sfdr)*(1-x$SEE.F)
}
return(x)
})
return(Results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.