#' @title PeCorA
#' @description core function of PeCorA workflow
#' @param t scaled data
#' @author Maria Dermit <maria.dermit@qmul.ac.uk>
#' @return dataframe output of PeCorA analysis containing values for proteins/peptides adj_pval for peptide interaction across condition and peptide groups.
#' @details PeCorA function relies on linear model to assess whether the slope of a peptide’s change across treatment groups differs from the slope of all other peptides assigned to the same protein.
#' @examples
#' \dontrun{
#' if(interactive()){
#' disagree_peptides <- PeCorA (scaled_peptides)
#' }
#' }
#' @rdname PeCorA
#' @importFrom stats lm
#' @importFrom car Anova
#' @export
PeCorA <- function(t){
# get all unique protein groups
print("checking which proteins still have at least 2 peptides")
pgs<-levels(as.factor(t$Protein))
pgs_morethan2<-c()
for(x in pgs){
if(length(unique(t[t$Protein %in% x, "modpep_z"]))>1){
pgs_morethan2<-c(pgs_morethan2, x)
}
}
### loop through proteins with >2 pep, get subset df
###### look through peptide precursors in that protein
######### check linear model, record p value
############ adjust pvals within each protein ### could do that differently?
allp<-list() # empty list to store pvalues for each protein
j=1 # progress bar iterator
t0<-Sys.time() # initial system time
print("computing the interaction p-values")
pb <- txtProgressBar(min = 0, max = length(pgs_morethan2), style = 3)
for( x in pgs_morethan2){ #loop through protein groups
tmpdf <- t[t$Protein ==x,] ## get the subset dataframe
tmpdf["allothers"]<-rep("allothers", times=nrow(tmpdf))
pvalues<-c(rep(0, length(unique(tmpdf$modpep_z))))
i=1
for( y in unique(tmpdf$modpep_z)){
subtmpdf <- tmpdf
subtmpdf[which(tmpdf$modpep_z ==y),"allothers"] <- y
tmplm<-lm(subtmpdf$ms1adj ~ subtmpdf$Condition*subtmpdf$allothers)
tmpanova<-car::Anova(tmplm)
pvalues[i]<-tmpanova$`Pr(>F)`[3]
i=i+1
}
allp[[x]]<-pvalues # record p-values
setTxtProgressBar(pb, j)
j=j+1
}
print(" ")
print(paste("PeCorA finished in ", round(Sys.time()-t0,2), " minutes", sep=""))
### post testing analysis
print( paste("number of proteins tested =", length(allp), sep=" ") )
print( paste("number of peptides tested =", length(unlist(allp)), sep=" ") )
######## make table of the peptides that disagree ##########
### dataframe with all values
print("started making data table")
alldf= data.frame()
x<-names(allp)[1]
for(x in names(allp)){
#print(x)
tmpdf <- t[t$Protein ==x,]
#tmpdf["allothers"]<-rep("allothers", times=nrow(tmpdf))
tmp_peps <- unique(tmpdf$modpep_z)
if(length(tmp_peps)>0){
tmp_pval <- allp[[x]]
tmpout= cbind.data.frame(protein=rep(x, length(allp[[x]])), tmp_peps, tmp_pval=as.numeric(tmp_pval))
alldf=rbind( alldf, tmpout)
}
}
print("correcting p-values")
alldf$adj_pval<-p.adjust(alldf$tmp_pval, method="BH") ## adjust p-values
alldf_ordered<-alldf[order(alldf$adj_pval),] ## sort
## print summary results
print(paste("number of uncorrelated peptides =", nrow(alldf[alldf$adj_pval<=0.01,]), sep=" "))
print(paste("number of proteins with uncorrelated peptides =",
length(unique(alldf[alldf$adj_pval<=0.01,]$protein)),
sep=" "
))
colnames(alldf_ordered)[2]<-"peptide"
colnames(alldf_ordered)[3]<-"pvalue"
alldf_ordered
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.