R/computeFDR.R
In prozor: Minimal Protein Set Explaining Peptide Spectrum Matches

Documented in computeFDR computeFDRwithID plotFDR predictScoreFDR

#' Compute FDR given a score
#'
#' For more details and references see package vignette
#' \code{vignette("TargetDecoyFDR_Example", package = "prozor")}
#'
#' @param score a vector with scores
#' @param ID - list with protein id's
#' @param decoy decoy pattern, default "REV_"
#' @param larger_better if larger score better than small (default TRUE), If small score better set FALSE
#' @return list with ID, decoy_hit (indicates if decoy), score the search engine score,
#' FDR1 false discovery rate estimated using the method of Elias and Gygi; FDR2 - estimated using the method of Kell.
#' @export
#' @examples
#' data(fdrSample)
#' # call constructor
#' #nrow(fdrSample)
#' #fdrSample <- dplyr::slice_sample(fdrSample, n = 40000)
#'
#' #usethis::use_data(fdrSample, overwrite = TRUE)
#' fdr1 <- computeFDRwithID(fdrSample$score, fdrSample$proteinID, larger_better = FALSE)
#' names(fdr1)
#' plot(fdr1$score, fdr1$FPR,type="l",xlim=c(0,0.001), ylim=c(0,0.0002))
#' lines(fdr1$score, fdr1$qValue_FPR, col=2)
#' lines(fdr1$score, fdr1$SimpleFDR,type="l",col=4)
#' lines(fdr1$score, fdr1$qValue_SimpleFDR, col=5)
#'
#'
#' fdr1 <- computeFDRwithID(fdrSample$score2, fdrSample$proteinID, larger_better = TRUE)
#' names(fdr1)
#' plot(fdr1$score, fdr1$FPR,type="l", xlim=c(2.5,5),ylim=c(0,0.001))
#' lines(fdr1$score, fdr1$qValue_FPR, col=2)
#' lines(fdr1$score, fdr1$SimpleFDR,type="l",col=4)
#' lines(fdr1$score, fdr1$qValue_SimpleFDR, col=5)
#'
computeFDRwithID <- function(score, ID, decoy = "REV_", larger_better = TRUE){
    decoy_hit <- grepl(decoy, ID)
    res <- computeFDR(score, decoy_hit, larger_better = larger_better)
    ID <- ID[res$order]
    res$ID <- ID
    return(res)
}

#' Compute FDR given a score
#'
#' Same as computeFDRwithID but works with decoy_hit boolean vector.
#' For more details and references see package vignette
#' \code{vignette("TargetDecoyFDR_Example", package = "prozor")}
#'
#' @param score score
#' @param decoy_hit indicates if decoy hit
#' @param larger_better is larger score the better one (default TRUE)
#' @export
#' @return list with decoy_hit (indicates if decoy), score the search engine score,
#' FDR1 false discovery rate estimated using the method of Gygi, SimpleFDR - estimated using the method of Kaell.
#'
#' @examples
#' data(fdrSample)
#'
#' fdr1 <- computeFDR(fdrSample$score, grepl("REV_",fdrSample$proteinID), larger_better = FALSE)
#' head(as.data.frame(fdr1))
#'
computeFDR <- function(score, decoy_hit , larger_better = TRUE){
    ord <- order(score, decreasing = larger_better)
    score <- score[ord]
    decoy_hit <- decoy_hit[ord]

    FP <- cumsum(decoy_hit)
    TP <- seq_len(length(ord)) - FP

    FPR <- (2 * FP) / (TP + FP)
    SimpleFDR <- FP / TP
    return(list(larger_better = larger_better,
                order = ord,
                decoy_hit = decoy_hit,
                score = score,
                FPR = FPR,
                SimpleFDR = SimpleFDR,
                qValue_FPR = rev( cummin( rev( FPR ) ) ),
                qValue_SimpleFDR = rev(cummin( rev( SimpleFDR ) ) )
    )
    )
}


#' plot FDR
#'
#' For more details and references see package vignette
#' \code{vignette("TargetDecoyFDR_Example", package = "prozor")}
#'
#' @param data data returned by computeFDR function
#' @export
#' @return creates a plot
#' @examples
#' #library(prozor)
#' data(fdrSample)
#' fdr1 <- computeFDRwithID(fdrSample$score, fdrSample$proteinID, larger_better = FALSE)
#' fdr2 <- computeFDRwithID(fdrSample$score2, fdrSample$proteinID, larger_better = TRUE)
#' plotFDR(fdr1)
#' plotFDR(fdr2)
#' data<-fdr1
#'
plotFDR <- function(data){
    graphics::par(mar = c(4,4,2,4))
    tx <- with(data,graphics::hist(score,plot = FALSE, breaks = 100))
    t1 <- with(data,graphics::hist(score[!decoy_hit],breaks = tx$breaks, xlab = "score",col = 1, border = 1,
                                   main = ifelse(data$larger_better,"larger score better","smaller score better")))
    with(data,graphics::hist(score[decoy_hit],add = TRUE,breaks = t1$breaks, col = 2, border = 2))
    graphics::par(new = TRUE)
    with(data,graphics::plot(score,qValue_FPR * 100, type = "l",col = 4,lwd = 2,xlab = NA, ylab = NA , axes = FALSE))
    with(data,graphics::lines(score,qValue_SimpleFDR  * 100, type = "l", col = 3,lwd = 2, xlab = NA, ylab = NA))

    graphics::axis(side = 4)
    graphics::mtext(side = 4, line = 3, 'FDR %')
}


#' Predict score given FDR
#'
#' For more details and references see package vignette
#' \code{vignette("TargetDecoyFDR_Example", package = "prozor")}
#'
#' @param fdrObj object generated by computeFDR
#' @param qValue false discovery rate in percent, default 1 percent
#' @param method either FPR or SimpleFDR, default is SimpleFDR
#' @return score for a given FDR
#' @export
#' @examples
#' data(fdrSample)
#' fdr1 <- computeFDRwithID(fdrSample$score, fdrSample$proteinID, larger_better = FALSE)
#'
#' predictScoreFDR(fdr1,qValue=5)
#' fdr2<-computeFDRwithID(fdrSample$score2, fdrSample$proteinID, larger_better = TRUE)
#' predictScoreFDR(fdr2,qValue=5)
#'
predictScoreFDR <- function(fdrObj,
                            qValue=1,
                            method="SimpleFDR"){
    if (method == "FPR") {
        validFDR <- fdrObj$qValue_FPR < qValue/100
    } else if (method == "SimpleFDR") {
        validFDR <- fdrObj$qValue_SimpleFDR < qValue/100
    } else {
        stop("no such method: ", method , "\n")
    }

    validScores <- fdrObj$score[validFDR]
    if (!fdrObj$larger_better) {
        max(validScores)
    } else {
        min(validScores)
    }
}