Nothing
qc.prepare <-
function(Data, SpeciesTable,placeholder,templateFasta,path = "./", filename = NULL,BSAID = "P02769",RESettings = RESettings,Peptides = NULL,AllPeptides = NULL,MSMS = NULL,msSC = NULL,msmsScans = NULL,selectedFile = 1,ProtFDR = 0.01,rfn = "",case_sensitive_matching= F){
score <- list()
sumDat <- function(){
temp <- list.files(path,pattern = "summary",full.name = T)
if(length(temp) > 0){
temp <- read.csv(temp[1],sep = "\t")
colnames(temp) <- tolower(colnames(temp))
return(cbind(as.character(temp$raw.file),temp$ms.ms.identified....))
}else{return(NA)}
}
get.env2 <- environment()
ls.null <- function(get.env = get.env2){
ls.temp <- ls(envir = get.env)
for(i in 1:length(ls.temp)){
temp.i <- get(ls.temp[i])
if(length(temp.i) == 0){
assign(ls.temp[i],0,envir = get.env)
}
}
}
#### Define thresholds
# needs to be modular
####
thresholds <- list()
thresholds$LC.Peak.rel.Symmetry <- c(1,0.7,1.3)
thresholds$LC.PeakWidth.s <- c(20,0,60)
thresholds$LC.ProfileSymmetry <- 0.8
thresholds$MS.Intensities.log10 <- c(8,6.5,9.5) # log10 int
thresholds$MS.Precision.ppm <- c(4,0,8)
thresholds$MS.IsotopicPatternsPerMin <- 2000
thresholds$MSMS.MatchedFragmentsPerMSMS <- 4000
thresholds$MSMS.Intensities.log10 <- c(4.5,4,5) # log10 Int
thresholds$MSMS.PSM.per.min <- 200
thresholds$MSMS.ProteinCoverage <- 30
# old thresholds:
thresholds$quan.duplicates.msms <- 0.05
thresholds$score <- 100
thresholds$msmsEff <- 30
thresholds$quanRetRSD <- 0.3
thresholds$quanRet50ratio <- 1.2
thresholds$quanRetSlope <- 1
thresholds$MSMS.MatchedFragmentsPerMSMSs <- c(25,15,35)
thresholdsBA <- thresholds
colnames(Data) <- tolower(colnames(Data))
#SPeciesTable---------
if(SpeciesTable){
species <- read.csv(paste(path.package("mqqc"),"data/MQQCspecies.txt",sep = "/"),sep = "\t",stringsAsFactors = F)
if(length(species) == 1){
species <- read.csv(paste(path.package("mqqc"),"data/MQQCspecies.txt",sep = "/"),sep = "\t",stringsAsFactors = F)
}
species$Protein[is.na(species$Protein)] <- ""
RawFile <- unique(Data$raw.file)[1]
# RawFile <<- RawFile
# templateFasta <<- templateFasta
regEx <- sapply(species$Abbreviation,function(x){gsub(placeholder,x, templateFasta,fixed = T)})
# regEx <<- regEx
temp <- as.logical(sapply(regEx,grep, x = RawFile,ignore.case = !case_sensitive_matching))
temp[is.na(temp)] <- FALSE
if(any(temp)){
# Testing using MachineSpecific thredsholds based on distribution
# DisPath <- paste(folder,"_RmqqcFile_Quantiles",paste(species$Abbreviation[temp],"_quan.rda",sep = ""),sep = "/")
# if(file.exists(DisPath)){
# load(DisPath)
#
# NewThresholds<- sapply(Quan,function(x){
# x <<- x
# names(x)
# fufi <- gregexpr(RESettings$REmac,RawFile)[[1]]
# mac <- substr(RawFile,fufi,(fufi+attributes(fufi)$match.length-2))
# return(x[,colnames(x)==mac][c(1,2,3)])
# })
#
# }
speciesUsed <- species[temp,]
thresholds <- as.list(speciesUsed[1,])
thresholds <- lapply(thresholds,function(x){
x <- as.character(x)
x <- unlist(strsplit(x," "))
x <- as.numeric(x)
})
}else{
speciesUsed <- species[species$Abbreviation == "default",]
thresholds <- as.list(speciesUsed[1,])
thresholds <- lapply(thresholds,function(x){
x <- as.character(x)
x <- unlist(strsplit(x," "))
x <- as.numeric(x)
})
if(dim(speciesUsed)[1] == 0){
thresholds <- thresholdsBA
cat("\n\nWARNING! Wether a parameter ID nor a default method was detected. Jumping to preset thresholds. Scoring might be deficient\n\n")
}
}
}
try(SpecStat <- SpeciesStatistic(Data$leading.razor.protein,F))
if(exists("SpecStat")){
data()
MostProperSpecies = SpecStat
}else{MostProperSpecies = NA; SpecStat = NA}
Data <- as.data.frame(Data)
grep.col <- function(string,Data,fixed = T){
x <- grep(string,colnames(Data),fixed = fixed)
if(length(x) == 0){
x <- 0
}
if(length(x) > 1){
cat("\rwarning, more than one match found",rep(" ",100))
}
return(x)
}
#Initiation-----------
# Initiate output list
summary.Data <- list()
# read.Data
#Data <- temp.i
colnames(Data) <- tolower(colnames(Data))
raw.files <- grep.col("raw.file",Data)
# to make sure that only one file is processed
Data$reverse[is.na(Data$reverse)] <- ""
Data$pep <- unfactor(Data$pep)
Data$intensity <- unfactor(Data$intensity)
Data$retention.time <- unfactor(Data$retention.time)
Data$retention.length <- unfactor(Data$retention.length)
Data$uncalibrated.mass.error..ppm. <- unfactor(Data$uncalibrated.mass.error..ppm.)
Data$m.z <- unfactor(Data$m.z)
Data$charge <- unfactor(Data$charge)
Data$score <- unfactor(Data$score)
Data$missed.cleavages <- unfactor(Data$missed.cleavages)
Data$calibrated.retention.time <- unfactor(Data$calibrated.retention.time)
# Data$ <- unfactor(Data$retention.length)
Data.i <- Data[unique(Data[,raw.files])[selectedFile] == Data[,raw.files],]
Data.iall <- Data.i#[Data.i$reverse == "+",]
Data.i <- Data.i[Data.i$reverse != "+",]
RawFilesUsed <- unique(Data.i$raw.file)
#Undefined-----------
pepPu<- aggregate(as.numeric(Data.iall$pep),list(Data.iall$leading.razor.protein,Data.iall$sequence),min,na.rm = T)
pepPr <- aggregate(pepPu$x,list(pepPu$Group.1),prod)
pepPr <- pepPr[order(pepPr[,2]),]
Rev <- grep("^REV",pepPr[,1])
RevP <- (1:length(Rev))/Rev
if(length(RevP) == 0){RevP <- 0}
hum <- Rev-c(0,Rev[-length(Rev)])
# hum[length(hum)] <- 0
fu <- unlist(apply(cbind(hum,RevP),1,function(x){return(rep(x[2],x[1]))}))
if(length(fu) > 0){
PL <- length(fu[fu < ProtFDR])
# fu <- fu
# pepPr <<- pepPr
sele <- pepPr[fu < 0.05,]
}else{
PL <- length(fu)
sele <- pepPr
}
# UNIREF STUFF ------------
UNIREF <- grep("^UniRef90|^REV_UniRef",Data.iall$proteins)
if(length(UNIREF) > 0){
# sele <<- sele
UNIREFP <- Data.iall[UNIREF,]
UNIREFP <- UNIREFP[!is.na(match(strsplitslot(UNIREFP$proteins),sele[,1])),]
Puniref <- grep("^REV_",UNIREFP$proteins,value = T,invert = T)
PunirefRev <- length(grep("^REV_",UNIREFP$proteins,value = T,invert = F))
Iuniref <- rank(UNIREFP$intensity)
Iuniref <- Iuniref/dim(Data.i)[1]
SCuniref <- UNIREFP$score
Sequniref <- UNIREFP$sequence
Pepuniref <- UNIREFP$pep
UniRef <- list(id = Puniref,intensities = Iuniref,score = SCuniref,sequence = Sequniref,Rev = PunirefRev,PEP = Pepuniref)
}else{
UniRef <- list(id = NA,intensities = NA,score = NA,sequence = NA,Rev = NA,PEP = NA)
}
# MSMS------------
try(rm(msSCH),silent = T)
msSCH <- msSC
# Ret.time
reten.ident <- Data.i[,grep.col("retention.time", Data.i)]
reten.start <- reten.ident[,grep.col("start",reten.ident)]
reten.stop <- reten.ident[,grep.col("finish", reten.ident)]
reten.mid <- reten.ident[,colnames(reten.ident) == "calibrated.retention.time"]
quant.range <- quantile(as.numeric(reten.mid),na.rm = T)[2:4]
ident.peps <- reten.mid > min(quant.range,na.rm = T) & reten.mid < max(quant.range,na.rm = T)
Data.i.quant <- Data.i[ident.peps,] # subset with innerj range
reten.ident <- Data.i.quant[,grep.col("retention.time", Data.i.quant)]
reten.start <- reten.ident[,grep.col("start",reten.ident)]
reten.stop <- reten.ident[,grep.col("finish", reten.ident)]
reten.mid <- reten.ident[,colnames(reten.ident) == "calibrated.retention.time"]
reten.start <- as.numeric(reten.start)
reten.stop <- as.numeric(reten.stop)
reten.mid <- as.numeric(reten.mid)
# ls.null()
ret.peak.ratio <- (reten.stop-reten.mid)/(reten.mid-reten.start)
ret.total <- reten.stop - reten.start
# Starting Scoring
#LC
summary.Data$ret.peak.shape <- quantile(ret.peak.ratio[ident.peps],na.rm = T) # filtered for inner elution time quantile
score$peak.shape <- ThreshCompare(log2(summary.Data$ret.peak.shape[3]),thresholds$LC.Peak.rel.Symmetry,type = "quantile",cat = "low")
summary.Data$ret.peak.shape.abs <- quantile(abs(log10(ret.peak.ratio[ident.peps][ret.peak.ratio[ident.peps]!=0])),na.rm = T) # filtered for inner elution time quantile
score$peak.shape.abs <- ThreshCompare(10^(summary.Data$ret.peak.shape.abs[3]),thresholds$LC.Peak.rel.Symmetry,type = "quantile",cat = "low")
ThreshCompare(10^(summary.Data$ret.peak.shape.abs[3]),thresholds$LC.Peak.rel.Symmetry,type = "quantile",cat = "low")
# Select top 5000 abundant peptides for Peak width estimation
sel <- order(Data.i$intensity[ident.peps],decreasing = T)
if(length(sel) > 5000){
sel <- sel[sel<= 5000]
}else{
sel <- sel == sel
}
summary.Data$ret.width <- quantile(log10(ret.total[ident.peps][sel]*60),na.rm = T)#filtered for inner elution time quantile
score$ret.width <- ThreshCompare((summary.Data$ret.width[c(3)]),log10(thresholds$LC.PeakWidth.s),type = "quantile",cat = "low")
#Other
summary.Data$ret.max <- max(reten.mid[ident.peps],na.rm = T)#filtered for inner elution time quantile
# MSMS-----
type.ident <- Data.i[,grep.col("type", Data.i)]
msms.count <- length(grep("MSMS", type.ident))
summary.Data$msms.count <- msms.count
summary.Data$total.msms.min <- summary.Data$msms.count/summary.Data$ret.max
summary.Data$quan.msms.min <- length(grep("MSMS",Data.i.quant$type))/(max(quant.range,na.rm = T) - min( quant.range,na.rm = T))
score$msms <- ThreshCompare(summary.Data$quan.msms.min,thresholds$MSMS.PSM.per.min )
# mass error ------
# Data.i.quant <<- Data.i.quant
mass.error <- Data.i.quant[,grep.col("mass.error..ppm",Data.i.quant)]
if(median(as.numeric(unlist(mass.error)),na.rm = T) > 100){
# fix for tmt mass error
mass.error <- Data.i.quant[,grep.col("uncalibrated...calibrated.m.z..ppm.",Data.i.quant)]
mass.error.uncal <- mass.error
print("To high mass error detected. Using uncalibrated...calibrated.m.z..ppm.")
}else{
mass.error.uncal <- mass.error[,grep.col("uncalibrated",mass.error)]
mass.error <- mass.error[,-grep.col("uncalibrated",mass.error)]
}
summary.Data$mass.error.cal <- quantile(as.numeric(mass.error),na.rm = T)
summary.Data$mass.error.uncal <- quantile(as.numeric(mass.error.uncal),na.rm = T)
score$mass.error <- ThreshCompare(abs(summary.Data$mass.error.uncal[3]),(thresholds$MS.Precision.ppm),type = "quantile",cat = "fixed")
# Precision ------
summary.Data$precision <- quantile(abs(as.numeric(mass.error.uncal)),na.rm = T)
score$precision <- ThreshCompare(abs(summary.Data$precision[3]),(thresholds$MS.Precision.ppm),type = "quantile",cat = "low")
# duplicates--------
pep.identifier <- paste(round(Data.i.quant$m.z),round(Data.i.quant$charge), Data.i.quant$modified.sequence)
double <- length(pep.identifier)-length(unique(pep.identifier))
summary.Data$quan.duplicates <- double
summary.Data$quan.duplicates.msms <- double/length(grep("MSMS",Data.i.quant$type))
summary.Data$score <- quantile(Data.i$score,na.rm = T)
# sd interquantile----------
if(length(Data.i.quant$calibrated.retention.time[!is.na(Data.i.quant$calibrated.retention.time)])> 1){
tempQuan <- quantile(Data.i.quant$calibrated.retention.time,na.rm = T)
if(length(AllPeptides) > 0){
tempL <- 1:2
}else{tempL <- 1}
for(deci in tempL){
if(deci == 1){
try(temp <- density(Data.i$calibrated.retention.time),silent = T)#;temp$y <- temp$y*temp$n/sum(temp$y)
}
if(deci == 2){
try(temp <- density(AllPeptides$Retention.time),silent = T)#;temp$y <- temp$y*temp$n/sum(temp$y)
}
x <- temp$x
y <- temp$y
ySel <- y[x > tempQuan[2]&x< tempQuan[4]]
xSel <- x[x > tempQuan[2]&x< tempQuan[4]]
y<- y
x<- x
ySel <- ySel/mean(ySel,na.rm = T)
xSel <- xSel-min(xSel,na.rm = T)
xSel <- xSel/max(xSel,na.rm = T)
if(deci == 1){
slope <- NA
rSDquanRet <- NA
try(rSDquanRet <- sd(ySel,na.rm = T)/mean(ySel,na.rm = T),silent = T)
try(slope <- coefficients(lm(scale(ySel)~xSel))[2],silent = T)
}else{
slopeALL <- NA
rSDquanRetALL <- NA
try( rSDquanRetALL <- sd(ySel,na.rm = T)/mean(ySel,na.rm = T),silent = T)
try(slopeALL <- coefficients(lm(scale(ySel)~xSel))[2],silent = T)
}
}
}else{
rSDquanRet <- NA
slope <- NA
}
summary.Data$quanRetRSD <- rSDquanRet
summary.Data$quanRetSlope <- slope
#tempQuan <- tempQuan
RatioIQuan <- diff(tempQuan[c(2,4)])/diff(tempQuan[c(1,5)]) # Ratio between inner and outer quantile distance of retention time, the bigger the better
summary.Data$RatioIQuan <- RatioIQuan
summary.Data$quanRet50ratio <- log2( diff(tempQuan[c(2,3)])/diff(tempQuan[c(3,4)]))
score$quanRetRSD <- ThreshCompare(summary.Data$quanRetRSD,c(0,0,thresholds$quanRetRSD),type = "quantile",cat = "fixed")
s <- summary.Data$quanRetRSD
r <- thresholds$quanRetRSD
score$quanRetSlope <- ThreshCompare(1/abs(summary.Data$quanRetSlope),1/thresholds$quanRetSlope,type = "single")
score$quanRet50ratio <- ThreshCompare(abs(abs(summary.Data$quanRet50ratio)),c(0,abs(abs(log2(thresholds$quanRet50ratio))),abs(abs(log2(thresholds$quanRet50ratio)))),cat = "fixed",type = "quantile",log = T)
# if(abs(summary.Data$quanRet50ratio) > 1){
# score$quanRet50ratio <- ThreshCompare(abs(summary.Data$quanRet50ratio),abs(thresholds$quanRet50ratio))}else{
# score$quanRet50ratio <- ThreshCompare(thresholds$quanRet50ratio,abs(summary.Data$quanRet50ratio))#?
# }
# Intensity Value
vals <- grep("^intensity",colnames(Data),ignore.case = T)
if(length(vals) > 1){
ValExclude <- grep("^intensity$",colnames(Data),ignore.case = T)
vals <- setdiff(vals,ValExclude)
}
Int <- Data.i.quant[, vals]
Int <- as.vector(unfactor(Int))
Int <- Int[Int != 0]
IntQuan <- quantile(Int,na.rm = T)
summary.Data$Intensity <- IntQuan
IntQuan <- IntQuan
thresholds <- thresholds
score$Intensity <- ThreshCompare(log10(signif(IntQuan[3])),thresholds$MS.Intensities.log10,type = "quantile",cat = "high")
# check MSMS
if(file.exists(peppath<- paste(path,"peptides.txt",sep = "/"))|length(Peptides) > 0){
if(length(Peptides) > 0){
if(is.data.frame(Peptides)){
tempPep <- Peptides
}else{
tempPep <- read.csv(peppath,sep = "\t")
}
}else{
tempPep <- read.csv(peppath,sep = "\t")
}
tempPep$Missed.cleavages[tempPep$Missed.cleavages > 0] <- 1
TempMis <- aggregate(tempPep$Missed.cleavages,list(tempPep$Missed.cleavages),length)
val1 <- TempMis[TempMis[,1] == 0,2]
val2 <- TempMis[TempMis[,1] > 0,2]
if(length(val1) == 0){val1 = 0}
if(length(val2) == 0){val2 = 0}
rat = val2 /(val1+ val2)*100
}else{rat = "not available"}
summary.Data$missed.cleavages.percent =as.character(rat)
MSMSsel <- match(Data.i.quant$ms.ms.scan.number,MSMS$Scan.number)
try(msmsInfo <- msmsPlot(path = path, RawFilesUsed= RawFilesUsed,quant.range = range(quant.range),MSMS.Import = MSMS[MSMSsel,]),silent = T)
if(!exists("msmsInfo")){
msmsInfo <- rep(0,5)
msmsInfo <- list(MSMSint = "nodata",MSMSn = "nodata")
}else{
if(!is.list(msmsInfo)){
msmsInfo <- list(MSMSint = "nodata",MSMSn = "nodata")
}
}
msmsInfo$MSMSint[is.na(msmsInfo$MSMSint)] <- 0
if(all(msmsInfo$MSMSint == 0)|all(is.character(unlist(msmsInfo)))){
summary.Data$msmsQuantile <- c(0,0,NA,0,0)
summary.Data$msmsMassCount <- c(0,0,NA,0,0)
score$msmsQuantile <- NA
score$msmsCount <- NA
}else{
summary.Data$msmsQuantile <- msmsInfo$MSMSint
summary.Data$msmsMassCount <- msmsInfo$MSMSn
score$msmsQuantile <- ThreshCompare(log10(summary.Data$msmsQuantile)[3],thresholds$MSMS.Intensities.log10,type = "quantile",cat = "high")
###
score$msmsCount <- ThreshCompare((summary.Data$msmsMassCount)[3],thresholds$MSMS.MatchedFragmentsPerMSMS,type = "quantile",cat = "high")#ThreshCompare(msmsInfo$MSMSn[3], thresholds$MSMS.MatchedFragmentsPerMSMS) #(sig - lo)/(ref[1]-lo)
}
# Check Summary
summaryPath <- list.files(path,pattern = "proteinGroups.txt",full.name = T)
# include Protein entry, in case older sepcies table is used
noCoverage <- F
if(length(speciesUsed$Protein)==0){
if(dim(speciesUsed)[1] == 0){
speciesUsed <- rbind(speciesUsed,"")
noCoverage <- T
}
speciesUsed$Protein <- ""
}
if(nchar(as.character(speciesUsed$Protein)) > 0){
summaryFile <- read.csv(summaryPath,sep = "\t")
BSA <- summaryFile[grep(speciesUsed$Protein,summaryFile$Majority.protein.IDs),]
if(is.data.frame(BSA)){
Coverage <- BSA$Sequence.coverage....
}else{
Coverage <- paste(quantile(summaryFile$Sequence.coverage....,na.rm = T),collapse = " # ")
speciesUsed$Protein <- ""
}
}else{
if(file.exists(summaryPath)&!noCoverage){
summaryFile <- read.csv(summaryPath,sep = "\t")
Coverage <- median(summaryFile$Sequence.coverage....,na.rm = T)#paste(quantile(summaryFile$Sequence.coverage....,na.rm = T),collapse = " # ")
}else{Coverage <- NA}
}
summary.Data$Coverage <- Coverage
if(length(thresholds$MSMS.ProteinCoverage) == 0){thresholds$MSMS.ProteinCoverage <- 50}
score$ProteinCoverage <- ThreshCompare(summary.Data$Coverage,thresholds$MSMS.ProteinCoverage)
#####
#Combined Scores
#####
# 1. nLC shape Combi
nLCvec <- c(score$quanRetRSD,score$quanRet50ratio)
nLCvec[nLCvec > 1] <- 1
score$nLCcombi <- mean(nLCvec)
# msScans Analysis:
# quant.range <- c(8,35)
if(length(msSC) > 0){
msSC$Retention.time <- as.numeric(as.character(msSC$Retention.time))
rs <- msSC$Retention.time*60
msSC$Isotope.patterns <- c(1,diff(rs))*unfactor(msSC$Isotope.patterns...s)
msSCquant <- msSC[msSC$Retention.time >= min(quant.range)&msSC$Retention.time<= max(quant.range,na.rm = T),]
summary.Data$Ion.injection.time <- quantile(as.numeric(as.character(msSCquant$Ion.injection.time)),na.rm = T)
summary.Data$Peaks.s <- quantile(as.numeric(as.character(msSCquant$Peaks...s)),na.rm = T)
summary.Data$MS.MS.identification.rate <- quantile(as.numeric(as.character(msSCquant$MS.MS.identification.rate....)),na.rm = T)
summary.Data$Cycle.time <- quantile(as.numeric(as.character(msSCquant$Cycle.time)),na.rm = T)
summary.Data$Total.ion.current <- quantile(as.numeric(as.character(msSCquant$Total.ion.current)),na.rm = T)
summary.Data$Total.ion.current <- quantile(as.numeric(as.character(msSCquant$Total.ion.current)),na.rm = T)
summary.Data$RawOvFtT <- quantile(unfactor(msSCquant$RawOvFtT),na.rm = T)
rettimePlots <- function(x,feature,logfun = log10,plotfun = plot ,...){
y <- x[,colnames(x) == feature]
if(length(y ) == dim(x)[1]){
y <- logfun(as.numeric(as.character(y)))
a <- round(as.numeric(as.character(x$Retention.time)),1)
b <- y
b <- aggregate(b,list(a),median,na.rm = T)
plotfun(b[,1] ,b[,2],las = 1,...)
points(medwin(b[,1],b[,2],win = dim(b)[1]/30),col = "red",type = "l")
}
}
pdf(paste("RetPlots_",rfn,".pdf",sep = ""),height = 4)
par(mfrow = c(2,3),mai = c(0.6,0.6,0.2,0.1))
unfactor <- function(x){as.numeric(as.character(x))}
retfun <- function(x){return(x)}
try(rettimePlots(msSC,"Ion.injection.time",ylab = "log10 Ion injection time",type = "l",xlab = "Retention time in min"),silent = T)
try(rettimePlots(msSC,"MS.MS.identification.rate....",type = "l",logfun =retfun,ylab = "MS2 identification rate [%]",xlab = "Retention time in min"),silent = T)
try(rettimePlots(msSC,"Cycle.time",type = "l",logfun = retfun,xlab = "Retention time in min",ylab = "Cycle time"),silent = T)
try(rettimePlots(msSC,"Peaks...s",type = "l",xlab = "Retention time in min",logfun = retfun,ylab = "Peaks [1/s]"),silent = T)
try(rettimePlots(msSC,"Single.peaks...s",type = "l",plotfun = points,logfun = retfun,lty = "dashed",xlab = "Retention time in min"),silent = T)
try(rettimePlots(msSC,"Isotope.patterns...s",type = "l",logfun = retfun,plotfun = plot,lty = "solid",xlab = "Retention time in min",ylab = "Isotope patterns / s"),silent = T)
try(rettimePlots(msSC,"Isotope.pattern.length",type = "l",logfun = retfun,plotfun = plot,lty = "dashed",xlab = "Retention time in min",ylab = "Isotope pattern length"),silent = T)
dev.off()
# system("open RetPlots.pdf")
summary.Data$Isotope.patterns.min <- sum(msSCquant$Isotope.patterns,na.rm = T)/diff(range(msSCquant$Retention.time))
# 0.07566 0.50490 0.76764 1.84680 2.98290 4.12050
# rettimePlots(msSC,"Single.isotope.patterns...s",type = "l",plotfun = plot,lty = "dashed")
}else{summary.Data$Isotope.patterns.min <- NA}
if(length(msmsScans) >0){
msmsScans$Retention.time <- as.numeric(as.character(msmsScans$Retention.time))
rs <- msmsScans$Retention.time*60
# msmsScans$Isotope.patterns <- c(1,diff(rs))*unfactor(msmsScans$Isotope.patterns...s)
msmsScansquant <- msmsScans[msmsScans$Retention.time >= min(quant.range)&msmsScans$Retention.time<= max(quant.range,na.rm = T)& msmsScans$Identified == "+",]
summary.Data$Ion.injection.time.MSMS <- quantile(as.numeric(as.character(msmsScansquant$Ion.injection.time)),na.rm = T)
summary.Data$Collision.energy.msms <- quantile(as.numeric(as.character(msmsScansquant$Collision.energy)),na.rm = T)
summary.Data$AGC.Fill <- quantile(as.numeric(as.character(msmsScansquant$AGC.Fill)),na.rm = T)
summary.Data$Cycle.time.MSMS <- quantile(as.numeric(as.character(msmsScansquant$Ion.injection.time)),na.rm = T)
summary.Data$Total.ion.current.MSMS <- quantile(as.numeric(as.character(msmsScansquant$Total.ion.current)),na.rm = T)
}
score$Isotope.patterns.min <- ThreshCompare(summary.Data$Isotope.patterns.min,thresholds$MS.IsotopicPatternsPerMin )
#AllPeptides
msmsEff <- msmsInfo$MSMSEff*100
#msmsEff <- sumDat() # gets msms effecency info from summary table
if(length(msmsEff) == 0){
# try(msmsEff <- length(Data.i.quant$ms.ms.ids[!is.na(Data.i.quant$ms.ms.ids)])/(max(as.numeric(Data.i.quant$ms.ms.ids),na.rm = T)-min(as.numeric(Data.i.quant$ms.ms.ids),na.rm = T))*100)
# try(msmsEff <- length(Data.i.quant$ms.ms.ids[!is.na(Data.i.quant$ms.ms.ids)])/(max(as.numeric(Data.i.quant$ms.ms.ids),na.rm = T)-min(as.numeric(Data.i.quant$ms.ms.ids),na.rm = T))*100)
msmsEff <- NA
}
MSID.min = NA
if(length(AllPeptides) > 0){
allMSMS <- (AllPeptides$Sequence != " ")
allMSMS <- allMSMS[allMSMS]
msmsEffAll <- length(Data.i$reverse[Data.i$reverse != "+"])/dim(AllPeptides)[1]*100
sel <- AllPeptides$Retention.time > min(quant.range) & AllPeptides$Retention.time < max(quant.range)
allMSMS <- (AllPeptides$Sequence[sel] != " ")
allnotIdentifiedMSMS <- (AllPeptides$Sequence[sel] != " ")
Features <- length(sel[sel])
#allMSMST <- allMSMS[allMSMS]
#allMSMSF <- allMSMS[!allMSMS]
msmsEffQuantile <- length(Data.i.quant$reverse[Data.i.quant$reverse != "+"])/length(allMSMS)*100
msmsEff <- msmsEffQuantile
tempMSIDMIN <- unlist(Features/abs(diff(quant.range[c(1,3)])))
names(tempMSIDMIN) <- NULL
summary.Data$MSID.min <- tempMSIDMIN
score$MSID.min <- ThreshCompare(summary.Data$MSID.min,thresholds$MS.IsotopicPatternsPerMin )
summary.Data$AllPeptides <- dim(AllPeptides)[1]
summary.Data$AllPeptidesIdentified <- length(AllPeptides$Score[AllPeptides$Sequence!= " "])
summary.Data$AllPeptides_QuantileWindow <- length(sel[sel])
summary.Data$AllPeptidesIdentifiedMSMS_QuantileWindow <- length(allnotIdentifiedMSMS[allnotIdentifiedMSMS])
}else{score$MSID.min <- NA;summary.Data$MSID.min <- NA;summary.Data$AllPeptidesIdentified <- NA;summary.Data$AllPeptides <- NA}
#else{
# if(dim(msmsEff)[1] == 2){
# msmsEff <- msmsEff[1,2]
# }else{
# if(dim(msmsEff)[1] == 2& filename != 0){
# msmsEff <- msmsEff[msmsEff[,1] == gsub(".raw$","",filename),2]
# }else{msmsEff <- 0}
#
# }
#
# }
msmsEff <- as.numeric(msmsEff)
# }else{msmsEff <- 0}
# }else{
# msmsEff <- as.numeric(msmsEff[dim(msmsEff)[1],2])/thresholds$msmsEff
# }
summary.Data$msmsEff <- msmsEff
summary.Data$msmsEffall <- msmsEffAll
# if(is.vector(msmsEff)){msmsEff <- as.matrix(msmsEff)}
score$msmsEff <- ThreshCompare(msmsEff,thresholds$msmsEff)
summary.Data <- summary.Data
thresholds <- thresholds
# scores
#thresholds$MS.Precision.ppm[1]/max(abs(summary.Data$mass.error.uncal[c(2,4)]))*0.5+ thresholds$MS.Precision.ppm[1]/max(abs(summary.Data$mass.error.uncal[c(3)]))*0.5
score$score <- ThreshCompare(summary.Data$score[3],thresholds$score)
# score nlc
score$quan.duplicates.msms <- ThreshCompare(summary.Data$quan.duplicates.msms,thresholds$quan.duplicates.msms)
# 2. MS combi score
#MSvec <- c(score$Intensity,score$mass.error,score$msms)
if(length(msSC) > 0){
MSfeature <- score$Isotope.patterns.min
}else{
MSfeature <- score$MSID.min
}
MSvec <- c(MSfeature,score$precision,score$Intensity)
MSvec[MSvec > 1] <- 1
score$combiMS <- mean(MSvec,na.rm = T)
# 3. MSMS combi score
MSMSvec <- c(score$msms,score$msmsQuantile,score$msmsCount)
MSMSvec[MSMSvec > 1] <- 1
score$combiMSMS <- mean(MSMSvec,na.rm = T)
# 4. nLC combi score
nLCvec <- c(score$nLCcombi,score$peak.shape.abs,score$ret.width)
nLCvec[nLCvec > 1] <- 1
score$LCcombi <- mean(nLCvec)
summary.Data$LCcombiScore <- mean(nLCvec,na.rm = T)
# efficiency
# if(length(msmsEff) == 1){
# if(any(is.na(msmsEff))){
parID <- speciesUsed$Abbreviation
if(length(parID) == 0){
parID <- "not detected"
}
# Other Metrics
summary.Data$mz <- quantile(Data.i.quant[,grep.col("^m.z$", Data.i.quant,FALSE)],na.rm = T) # filtered for inner elution time quantile
seq <- Data.i$sequence[grep("MSMS",type.ident)]
uniPepCount <- length(unique(seq))
summary.Data$uniPepCount <- uniPepCount
# protein ------
summary.Data$Protein <- PL
# TMT check
TMTFUN <- TMT_LABELEFF(Data.i)
summary.Data$TMT <- TMTFUN$vec
#DependentPeps-----------
summary.Data$DependentPeptides <- NULL
if(all(1)){
if(file.exists(DPfile <- paste(path,"allPeptides.txt",sep = "/"))|length(AllPeptides) > 0){
DepPepFun<- function(x,filename = "DPpie",NormPep = NULL,unknowns = T,cbPalette = rainbow(10),maxShow = 20){
if(!is.data.frame(x)){
DPlines <- read.csv(x,sep = "\t",stringsAsFactors = F)}else{DPlines <- x}
DPlinesSig <- DPlines[as.numeric(DPlines$"DP.PEP") < 0.01,]
DPlinesSig$DP.Modification[DPlinesSig$DP.Modification == ""] <- "unknown"
if(dim(DPlinesSig)[1] > 0){
Val <- aggregate(DPlinesSig$DP.Mass.Difference,list(DPlinesSig$DP.Modification),function(x){c(length(x),median(x,na.rm = T))})
ValR <- Val[[2]]
rownames(ValR) <- Val[,1]
if(!unknowns){
ValR <- ValR[rownames(ValR) != "unknown",]
}
ValRsum <- sum(ValR[,1],na.rm = T)
ValRrest <- ValR[ ExVec<- ValR[,1]/sum(ValR[,1]) < 0.01,]
ValRrest <- sum(ValRrest[,1])
ValR <- ValR[!ExVec,]
ValR <- rbind(ValR,c(ValRrest,NA))
rownames(ValR)[dim(ValR)[1]] <- "Other"
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","tomato3")
ValR <- ValR[order(ValR[,1],decreasing = T),]
}else{
ValRsum = 0
ValR = matrix(0)
rownames(ValR) <- "NA"
}
if(length(NormPep) > 0){
width = 18
}else{width = 8}
if(dim(ValR)[1] > maxShow){
ValR <- ValR[1:maxShow,]
}
if(ValRsum != 0){
pdf(pdfname <- paste(paste("DepPepPie",filename,sep = "_"),".pdf",sep = ""),width =width)
#tempy <- Hui$counts[peaksf]
#peaksf <<- peaksf[tempy/max(tempy)]
if(length(NormPep) > 0){
par(mfrow = c(1,2))
pie(c(NormPep,ValRsum),labels = paste(c("Identified","Dependent Peptides"),"n:",c(NormPep,ValRsum)),border = "transparent",col = c("red","grey"),main = "All Peptides",angle = 30,density = c(NA,20))
}
if(ValRsum == 0){
empty.plot()
legend("top",legend = "No dependent peptides detected.",xpd = NA,bty = "n")
}else{
pie(ValR[,1],labels = paste(rownames(ValR),"n:",round(ValR[,1],2)),border = "transparent",col = colorRampPalette((cbPalette))(dim(ValR)[1]),main = "Dependent Peptides")
}
par(mfrow = c(1,1))
PlotDPHist <- function(x){
Hui <- hist(x,breaks = 100000,plot = F)
plot(Hui$breaks[-1][Hui$counts > 0],Hui$counts[Hui$counts > 0],xlim = c(-50,50),type = "h",frame = F,xlab = "Mass Difference [Da]",ylab = "n",main = "")
abline(h = 0)
if(length(grep("pracma",library())) > 0){
try(library(pracma))
try(peaksf <-findpeaks(Hui$counts))
peaksf <- peaksf[,2]
}
if(!exists("peaksf")){
peaksf <- (1:length(Hui$counts))[Hui$counts >= sort(Hui$counts,decreasing = T)[15]]
}
text(Hui$breaks[peaksf],Hui$counts[peaksf],round(as.numeric(as.character(Hui$breaks[peaksf])),2),col = 2,type = "h",pos = 3,srt = 90,xpd = NA,cex = 0.8)
}
try(PlotDPHist(DPlinesSig$DP.Mass.Difference))
dev.off()
#system(paste("open",pdfname))
return( paste(apply(cbind(rownames(ValR),ValR),1,paste,collapse = "##"),sep = " ",collapse = "_#_"))
}else{return("No DP Peptides found.")}
}
summary.Data$DependentPeptides <- "No DP Peptides found."
if(length(AllPeptides) > 0){
if(is.matrix(AllPeptides)|is.data.frame(AllPeptides)){
DPfile <- AllPeptides
}
}
try(summary.Data$DependentPeptides <- DepPepFun(DPfile,NormPep = dim(Data.i)[1],cbPalette = cbPalette,filename = filename))
}
}
save(UniRef,file = "UniRef.rda")
return(list(th = thresholds,sc = score,sd = summary.Data,diq = Data.i.quant,IdentifiedProteins = speciesUsed$Protein,parID = parID,SpecStat = MostProperSpecies,UniRef = UniRef,TMTplot=TMTFUN$forplotting))
}
# tryError1 <- class(try(qc.prepare.data <- qc.prepare(Data = temp.DataEvidence,msmsScans = msmsScans,msSC = msScans, SpeciesTable = SpeciesTable,placeholder = placeholder,templateFasta = RESettings$REpar,path = .path,filename = i, BSAID = BSAID,RESettings = RESettings,Peptides = Peptides, AllPeptides =tempAllPeptides,MSMS = tempMSMS,rfn = i)))
# start.qc("/Users/henno/Documents/Skripte/R-Functions/Selbach-Functions/DataAnalysis/MQQC_Manuscript/tmt/txt/evidence.txt")
# tryError1 <- class(try(qc.prepare.data <<- qc.prepare(Data = temp.DataEvidence,msSC = msScans, SpeciesTable = SpeciesTable,placeholder = placeholder,templateFasta = RESettings$REpar,path = .path,filename = i, BSAID = BSAID,RESettings = RESettings,Peptides = Peptides, AllPeptides =tempAllPeptides,MSMS = tempMSMS)))
#tryError1 <- class(try(qc.prepare.data <- qc.prepare(Data = temp.DataEvidence, SpeciesTable = SpeciesTable,placeholder = placeholder,templateFasta = RESettings$REpar,path = .path,filename = i, BSAID = BSAID)))
#tryError1 <- class(try(qc.prepare.data <- qc.prepare(Data = temp.DataEvidence, SpeciesTable = SpeciesTable,placeholder = placeholder,templateFasta = RESettings$REpar,path = .path,filename = i, BSAID = BSAID,RESettings = RESettings,Peptides = Peptides, AllPeptides =AllPeptides,MSMS = MSMS)))
# start.qc()
#tryError1 <- class(try(qc.prepare.data <- qc.prepare(Data = temp.DataEvidence, SpeciesTable = SpeciesTable,placeholder = placeholder,templateFasta = RESettings$REpar,path = .path,filename = i, BSAID = BSAID,RESettings = RESettings,Peptides = Peptides, AllPeptides =AllPeptides,MSMS = MSMS)))
#tryError1 <- class(try(qc.prepare.data <- qc.prepare(Data = temp.DataEvidence, SpeciesTable = SpeciesTable,placeholder = placeholder,templateFasta = RESettings$REpar,path = .path,filename = i, BSAID = BSAID,RESettings = RESettings,Peptides = Peptides, AllPeptides =AllPeptides,MSMS = MSMS)))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.