PeCorA: Peptide correlation analysis

Documented in PeCorA

#' @title PeCorA
#' @description core function of PeCorA workflow
#' @param t scaled data
#' @author Maria Dermit <maria.dermit@qmul.ac.uk>
#' @return dataframe output of PeCorA analysis containing values for proteins/peptides adj_pval for peptide interaction across condition and peptide groups.
#' @details PeCorA function relies on linear model to assess whether the slope of a peptide’s change across treatment groups differs from the slope of all other peptides assigned to the same protein.
#' @examples
#' \dontrun{
#' if(interactive()){
#'  disagree_peptides <- PeCorA (scaled_peptides)
#'  }
#' }
#' @rdname PeCorA
#' @importFrom stats lm
#' @importFrom car Anova
#' @export
PeCorA <- function(t){
    # get all unique protein groups
      print("checking which proteins still have at least 2 peptides")
    pgs<-levels(as.factor(t$Protein))
    pgs_morethan2<-c()
    for(x in pgs){
        if(length(unique(t[t$Protein %in% x, "modpep_z"]))>1){
            pgs_morethan2<-c(pgs_morethan2, x)
          }
      }

      ### loop through proteins with >2 pep, get subset df
      ###### look through peptide precursors in that protein
      ######### check linear model, record p value
      ############ adjust pvals within each protein ### could do that differently?
      allp<-list() # empty list to store pvalues for each protein
      j=1 # progress bar iterator
      t0<-Sys.time()  # initial system time
      print("computing the interaction p-values")
      pb <- txtProgressBar(min = 0, max = length(pgs_morethan2), style = 3)
      for( x in pgs_morethan2){ #loop through protein groups
          tmpdf <- t[t$Protein ==x,]  ## get the subset dataframe
          tmpdf["allothers"]<-rep("allothers", times=nrow(tmpdf))
          pvalues<-c(rep(0, length(unique(tmpdf$modpep_z))))
          i=1
          for( y in unique(tmpdf$modpep_z)){
              subtmpdf <- tmpdf
              subtmpdf[which(tmpdf$modpep_z ==y),"allothers"] <- y
              tmplm<-lm(subtmpdf$ms1adj ~ subtmpdf$Condition*subtmpdf$allothers)
              tmpanova<-car::Anova(tmplm)
              pvalues[i]<-tmpanova$`Pr(>F)`[3]
              i=i+1
            }
          allp[[x]]<-pvalues # record p-values
          setTxtProgressBar(pb, j)
          j=j+1
        }

        print(" ")
      print(paste("PeCorA finished in ", round(Sys.time()-t0,2), " minutes", sep=""))


        ### post testing analysis
        print(  paste("number of proteins tested =", length(allp), sep=" ")  )
      print(  paste("number of peptides tested =", length(unlist(allp)), sep=" ")  )



        ######## make table of the peptides that disagree   ##########
      ### dataframe with all values
        print("started making data table")
      alldf= data.frame()
      x<-names(allp)[1]
      for(x in names(allp)){
          #print(x)
            tmpdf <- t[t$Protein ==x,]
            #tmpdf["allothers"]<-rep("allothers", times=nrow(tmpdf))
              tmp_peps <- unique(tmpdf$modpep_z)
              if(length(tmp_peps)>0){
                  tmp_pval <- allp[[x]]
                  tmpout= cbind.data.frame(protein=rep(x, length(allp[[x]])), tmp_peps, tmp_pval=as.numeric(tmp_pval))
                  alldf=rbind( alldf,  tmpout)
                }
            }
      print("correcting p-values")
      alldf$adj_pval<-p.adjust(alldf$tmp_pval, method="BH")     ## adjust p-values
      alldf_ordered<-alldf[order(alldf$adj_pval),]              ## sort

        ## print summary results
        print(paste("number of uncorrelated peptides =", nrow(alldf[alldf$adj_pval<=0.01,]),  sep=" "))
      print(paste("number of proteins with uncorrelated peptides =",
                                  length(unique(alldf[alldf$adj_pval<=0.01,]$protein)),
                                  sep=" "
                      ))

        colnames(alldf_ordered)[2]<-"peptide"
        colnames(alldf_ordered)[3]<-"pvalue"
        alldf_ordered
      }