R/estimatePPIErrorRates.R

Defines functions getFNrateFromFPrate getFPrateFromFNrate getx123 useMethodOfMoments getFPRateFromGSNeg getFNRateFromGSPos estimatePPIErrorRates

Documented in estimatePPIErrorRates

estimatePPIErrorRates <- function(matList, GSPos=NULL, GSNeg=NULL){
  printStatement <- "There was no Positive nor Negative Gold Standard Given, so
                     the error rates could not be estimated."

  ##If there is neither a positive nor negative GS, then this function
  ##is irrelevant
  if(is.null(GSPos) && is.null(GSNeg)){
    return(printStatement)
  }

  pfnList = NULL
  pfpList = NULL

  ##if we are given a positive gold standard, we will use it
  ##to estimate the pfn rates for each experiment
  if(!(is.null(GSPos))){
    pfnList <- getFNRateFromGSPos(matList=matList, GSPos=GSPos)
  }

  ##if we are give a negative gold standard, we will use it
  ##to estimate the pfp rates for each experiment
  if(!(is.null(GSNeg))){
    pfpList <- getFPRateFromGSNeg(matList=matList, GSNeg=GSNeg)
  }

  ##if both GS's are given, we will use the pfp and pfn estimates
  ##derived above
  if(!is.null(pfnList) && !is.null(pfpList)){
    errorMat <- cbind(pfnList, pfpList)
    colnames(errorMat) <- c("pfn","pfp")
    rownames(errorMat) <- names(matList)
    return(errorMat)
  }

  ##if the positive GS is given but not the negative, then we use
  ##the methods of moments to estimate the pfp rate
  if(is.null(GSNeg) && !(is.null(GSPos))){
    errorMat <- useMethodOfMoments(matList, pfnList, estFP=TRUE)
    return(errorMat)
  }

  ##if the negative GS is given but not the negative, then we use
  ##the methods of moments to estimate the pfn rate
  if(!(is.null(GSNeg)) && is.null(GSPos)){
    errorMat <- useMethodOfMoments(matList, pfnList, estFP=FALSE)
    return(errorMat)
  }
}

getFNRateFromGSPos <- function(matList, GSPos){

  pfnList <- sapply(matList, function(x) {
      dnames1 <- lapply(dimnames(x[[1]]), sort)
      dnames2 <- lapply(dimnames(x[[2]]), sort)
      if(!identical(dnames1,dnames2))
        {stop("The data matrix and tested matrix do not have the
                 same dimension names")}
      dnGS = dimnames(GSPos)
      ##first we need to be in the same universe to get anything of interest
      ##so we restrict the Gold Standard and the data/tested matrices to the
      ##common proteins 
      restrictedGS <- GSPos[intersect(dnGS[[1]], dnames1[[1]]),
                         intersect(dnGS[[2]], dnames1[[2]])]
      restrictedTested <- x[[2]][intersect(dnGS[[1]], dnames1[[1]]),
                                 intersect(dnGS[[2]], dnames1[[2]])]
      
      restrictedData <- x[[1]][intersect(dnGS[[1]], dnames1[[1]]),
                               intersect(dnGS[[2]], dnames1[[2]])]
      
      ##in order to estimate the false negative rate, we need to know the number
      ##of postive interactions in the GS which been tested in the experiment but
      ##not found..this is computed and recorded in testedInGSNotFound. then we
      ##quotient this by all the positive interactions in the GS which have been
      ##tested in the experiment
      testedInGSNotFound <- sum((!restrictedData) * restrictedTested * restrictedGS)
                      
      testedInGS <- sum(restrictedTested * restrictedGS)
                      
      pfnEst <- testedInGSNotFound/testedInGS
      pfnEst
    })

  return(pfnList)

}

getFPRateFromGSNeg <- function(matList, GSNeg){

  pfpList <- sapply(matList, function(x) {
    dnames1 <- lapply(dimnames(x[[1]]), sort)
    dnames2 <- lapply(dimnames(x[[2]]), sort)
    if(!identical(dnames1,dnames2))
      {stop("The data matrix and tested matrix do not have the
                 same dimension names")}
    dnGS = dimnames(GSNeg)
    ##first we need to be in the same universe to get anything of interest
    ##so we restrict the Gold Standard and the data/tested matrices to the
    ##common proteins 
    restrictedGS <- GSNeg[intersect(dnGS[[1]], dnames1[[1]]),
                          intersect(dnGS[[2]], dnames1[[2]])]
    restrictedTested <- x[[2]][intersect(dnGS[[1]], dnames1[[1]]),
                               intersect(dnGS[[2]], dnames1[[2]])]
    restrictedData <- x[[1]][intersect(dnGS[[1]], dnames1[[1]]),
                             intersect(dnGS[[2]], dnames1[[2]])]
    ##in order to estimate the false positive rate, we need to know the number
    ##of negative interactions in the GS which been tested in the experiment but
    ##were found..this is computed and recorded in notInGSFound. then we
    ##quotient this by all the positive interactions in the GS which have been
    ##tested in the experiment
    notInGSFound <- restrictedData*restrictedTested*restrictedGS
    notInGSTested <- restrictedTested*restrictedGS
    
    pfpEst <- notInGSFound/notInGSTested
    pfpEst
  })

  return(pfpList)
  
}


useMethodOfMoments <- function(matList, errorList, estFP=TRUE){

  ##x123 computes the statistics needed to estimate the pfp rate given the pfn rate
  x123 <- lapply(matList, getx123)
  
  if(estFP){
    errorList2 <- mapply(getFPrateFromFNrate, x123, errorList)
    errorMat <- cbind(errorList2, errorList)
    colnames(errorMat) <- c("pFP","pFN")
  }
  else{
    errorList2 <- mapply(getFNrateFromFPrate, x123, errorList)
    errorMat <- cbind(errorList2, errorList)
    colnames(errorMat) <- c("pFN","pFP")
    errorMat <- errorMat[,2:1]
    
  }

  unlist(errorList2)

  rownames(errorMat) <- names(matList)
  return(errorMat)
  
}
##      input: a list of two matrices,pos and test, in ppMatrix format.
  ##      output:
  ##          x1: the number of reciprocally observed edges.
  ##          x2: the number of reciprocally unobserved edges.
  ##          x3: the number of unreciprocally observed edges.
  
getx123 = function(ppMatrix) {
  x1<-sum(ppMatrix[["test"]]==2 & ppMatrix[["pos"]]==2)
  x2<-sum(ppMatrix[["test"]]==2 & ppMatrix[["pos"]]==0)
  x3<-sum(ppMatrix[["test"]]==2 & ppMatrix[["pos"]]==1)
  c(x1=x1,x2=x2,x3=x3)
}


## this function calculate FPrate from FNrate for an experiment.
## input: x1,x2,x3 are from the output of getx123()
##        FNrate is the FNrate for that experiment.
## output: the FPrate.

getFPrateFromFNrate <- function(y, FNrate) {
  ##TC needs to check this...
  ##the roles of FP and FN are interchangable by symmetry, so if
  ##FNrate is really FPrate, the function can also double as
  ##getFNrateFromFPrate
  x1 <- y[[1]]
  x2 <- y[[2]]
  x3 <- y[[3]]
  tot <- sum(y) 
  u<-x1-tot*(1-FNrate)^2
  v<-x2-tot*(FNrate)^2
  a<-u-v
  b<-(-2)*u
  c<-u-u*FNrate^2+v*(1-FNrate)^2
  d<-b^2-4*a*c
  if(d>=0)
    {
      FP1<-(-b+sqrt(d))/(2*a)
      FP2<-(-b-sqrt(d))/(2*a)
    } else{
      FP1<-NA
      FP2<-NA
    }
  if(!is.na(FP1) && FP1!=(1-FNrate)){ return(FP1)}
  else{ return(FP2)}
}

getFNrateFromFPrate <- function(y, FPrate){

  x1 <- y[[1]]
  x2 <- y[[2]]
  x3 <- y[[3]]
  tot <- sum(y)
  u<-x1-tot*FPrate^2
  v<-x2-tot*(1-FPrate)^2
  a<-u-v
  b<-2*v
  c<-(-v)-u*(1-FPrate)^2+v*FPrate^2

  d<-b^2-4*a*c
  if(d>=0)
    {
      FN1<-(-b+sqrt(d))/(2*a)
      FN2<-(-b-sqrt(d))/(2*a)
    } else{
      FN1<-NA
      FN2<-NA
    }
  if(!is.na(FN1) && FN1!=(1-FPrate)){ return(FN1)}
  else{ return(FN2)}

}

Try the ppiStats package in your browser

Any scripts or data that you put into this service are public.

ppiStats documentation built on Nov. 8, 2020, 8:18 p.m.