R/AnalyzeSimulatedData.R

Defines functions Change.pi0 Bin.prob Pvalue AnalyzeSimulatedData.0 AnalyzeSimulatedData

Documented in AnalyzeSimulatedData

library(qvalue)
#' Analyze simulation reslts
#' 
#' Analyze results derived from simulated data.
#' 
#' @param Sim Simulation output from \code{SimulateDataPaper}
#' @param Forward Target search results from \code{PerformSearch}
#' @param Reverse Decoy search results from \code{PerformSearch}
#' @param pi0 If known, the fraction of simulated spectra whose generating analyte does not lie in the target database. Detault is \code{NULL}, i.e. unknown
#' @param psm.fdr.bin PSM-fdr bins to determine PSM-fdr calibration
#' 
#' @return A list \item{S.F}{Bayes factor score for target database} \item{S.R}{Bayes factor score for reverse database} \item{SEE.F}{Score-ordering error rate} \item{Sfdr}{DI-fdr} \item{PSMfdr}{PSM-fdr} \item{pi0.hat}{Estimate for pi0} \item{pvalues}{p-values for the null hypothesis that the spectrum was not generated by an analyte in the target database} \item{Pep.F}{Inferred target peptide} \item{Correct}{Is \code{Pep.F} correct} \item{PSMfdr.bins}{PSM-fdr calibration results}
#' @export
AnalyzeSimulatedData <- function(Sim, Forward, Reverse, pi0=NULL, psm.fdr.bin=c(0,0.01,0.05,seq(0.1,1,by=0.1))) {
  out <- AnalyzeSimulatedData.0(Sim = Sim, Forward = Forward, Reverse = Reverse, pi0 = pi0, add.1 = T, psm.fdr.bin = psm.fdr.bin)
  out <- Change.pi0(Results = list(out))
  return(out[[1]])
}

AnalyzeSimulatedData.0 <- function(Sim, Forward, Reverse, pi0=NULL, add.1=T, psm.fdr.bin=c(0,0.01,0.05,seq(0.1,1,by=0.1))) {
  Peptide <- rep(NA,length(Sim$Pep))
  Peptide[!is.na(Sim$Pep)] <- paste0(Sim$Pep[!is.na(Sim$Pep)],".",Sim$Charge[!is.na(Sim$Pep)])
  Charge <- Sim$Headers.sim$precursorCharge
  out <- list()
  tmp <- lapply(Forward$Results,function(x){
    x <- x$FM
    if (is.na(x[1])) {return(list(c(NA,NA),NA))}
    max.x <- max(rowSums(rbind(x[,1:4])))
    pep.x <- rownames(x)[which.max(rowSums(rbind(x[,1:4])))]
    see.x <- 1 - 1/(sum(exp(rowSums(rbind(x[,1:4])) - max.x)))
    return(list(c(max.x,see.x),pep.x))
  })
  tmp2 <- sapply(tmp,function(x){x[[1]]})
  out$S.F <- tmp2[1,]
  out$SEE.F <- tmp2[2,]; rm(tmp2)
  out$Pep.F <- sapply(tmp,function(x){x[[2]]})
  out$Correct <- rep(NA,length(out$Pep.F))
  ind.correct <- which(Peptide==out$Pep.F)
  ind.use <- which(!is.na(out$S.F)); ind.incorrect <- ind.use[!ind.use%in%ind.correct]
  out$Correct[ind.correct] <- 1; out$Correct[ind.incorrect] <- 0
  out$S.R <- sapply(Reverse$Results,function(x){
    x <- x$FM
    if (is.na(x[1])) {return(NA)}
    return(max(rowSums(rbind(x[,1:4]))))
  })
  p.values <- Pvalue(S.F = out$S.F, S.R = out$S.R, charge.bin = list(2,3,4:10), charge = Charge, add.1 = add.1, pi0 = pi0)
  out$pvalue <- p.values$pvalue
  out$Sfdr <- p.values$fdr
  out$pi0.hat <- p.values$pi0.hat; out$pi0 <- pi0
  out$PSMfdr <- 1 - (1-p.values$fdr)*(1-out$SEE.F)
  out$PSMfdr.bins <- t(sapply(1:(length(psm.fdr.bin)-1),function(i){
    ind.i <- which(out$PSMfdr>=psm.fdr.bin[i] & out$PSMfdr<psm.fdr.bin[i+1])
    c(length(ind.i),mean(out$PSMfdr[ind.i]),1-mean(out$Correct[ind.i],na.rm=T))
  }))
  colnames(out$PSMfdr.bins) <- c("n","Est.PSMfdr","True.PSMfdr")
  out$Sfdr.bins <- t(sapply(1:(length(psm.fdr.bin)-1),function(i){
    ind.i <- which(out$Sfdr>=psm.fdr.bin[i] & out$Sfdr<psm.fdr.bin[i+1])
    c(length(ind.i),mean(out$Sfdr[ind.i]),1-mean(out$Correct[ind.i],na.rm=T))
  }))
  colnames(out$Sfdr.bins) <- c("n","Est.Sfdr","True.PSMfdr")
  return(out)
}

Pvalue <- function(S.F, S.R, charge, charge.bin=list(2,3,4:10), add.1=T, pi0=NULL) {
  out <- rep(NA,length(S.F))
  out.fdr <- rep(NA,length(S.F))
  ind.use <- !is.na(S.F)
  pi0.hat <- rep(NA,length(charge.bin))
  for (i in 1:length(charge.bin)) {
    ind.c <- which(ind.use & charge%in%charge.bin[[i]])
    S.R.c <- S.R[ind.c]
    n.c <- length(S.R.c)
    if (add.1) {
      out[ind.c] <- sapply(S.F[ind.c],function(x){(sum(S.R.c>x)+1)/(n.c+1)})
    } else {
      out[ind.c] <- sapply(S.F[ind.c],function(x){mean(S.R.c>x)})
    }
    out.fdr[ind.c] <- qvalue::lfdr(out[ind.c], pi0 = pi0)
    pi0.hat[i] <- qvalue::pi0est(p = out[ind.c])$pi0
  }
  return(list(pvalue=out,fdr=out.fdr,pi0.hat=pi0.hat,pi0=pi0))
}

Bin.prob <- function(Results, type=c("PSMfdr","Sfdr","SOE"), psm.fdr.bin=c(0,0.01,0.05,seq(0.1,1,by=0.1)), all=T, include.1=T) {
  type <- match.arg(type,c("PSMfdr","Sfdr","SOE"))
  if (all) {
    ind <- which(sapply(Results,function(x){!is.null(x)}))
    Correct <- unlist(lapply(Results[ind],function(x){x$Correct}))
    if (type=="PSMfdr") {prob <- unlist(lapply(Results[ind],function(x){x$PSMfdr}))}
    if (type=="Sfdr") {prob <- unlist(lapply(Results[ind],function(x){x$Sfdr}))}
    if (type=="SOE") {prob <- unlist(lapply(Results[ind],function(x){x$SEE.F}))}
  } else {
    Correct <- Results$Correct
    if (type == "PSMfdr") {prob <- Results$PSMfdr}
    if (type == "Sfdr") {prob <- Results$Sfdr}
    if (type == "SOE") {prob <- Results$SEE.F}
  }
  max.i <- length(psm.fdr.bin)-1
  out <- t(sapply(1:max.i,function(i){
    ind.i <- which(prob>=psm.fdr.bin[i] & prob<ifelse(i==max.i,ifelse(include.1,1.01,psm.fdr.bin[i+1]),psm.fdr.bin[i+1]))
    c(length(ind.i),mean(prob[ind.i]),1-mean(Correct[ind.i],na.rm=T))
  }))
  colnames(out) <- c("n",paste0("Est.",type),"True.PSMfdr")
  return(out)
}

Change.pi0 <- function(Results, pool.charge=T) {
  ind <- which(sapply(Results,function(x){!is.null(x)}))
  Results[ind] <- lapply(Results[ind],function(x){
    if (pool.charge) {
      x$pi0.hat <- qvalue::pi0est(p = x$pvalue, pi0.method = "bootstrap")$pi0
      x$Sfdr <- qvalue::lfdr(p = x$pvalue, pi0 = x$pi0.hat)
      x$PSMfdr <- 1 - (1-x$Sfdr)*(1-x$SEE.F)
    }
    return(x)
  })
  return(Results)
}
chrismckennan/MSeQUiP documentation built on Dec. 19, 2021, 4:01 p.m.