#' @title Prepare input file in standard formats of quantification tools
#' @description process the output files obtained from popular proteomic quantification software (>=18).
#' @param acquisitionmethods Input the corresponding "number" of acquisition techniques as follows:
#' If set 1, the user chooses to process the data based on SWATH-MS.
#' If set 2, the user chooses to process the data based on Peak Intensity.
#' If set 3, the user chooses to process the data based on Spectral Counting.
#' @param rawdataset Input the name of your raw dataset directly obtained from software.
#' @param lable Input the label of your dataset.
#' @return prepareinputfile matrix
#' @importFrom tidyr spread
#' @usage PrepareInuputFiles(acquisitionmethods, rawdataset, lable)
#' @export PrepareInuputFiles
#' @examples
#' library(EVALFQ)
#' \donttest{data_s <- PrepareInuputFiles(acquisitionmethods=2,
#' rawdataset = "MaxQuant_proteinGroups_LFQ.txt", lable = "MaxQuant_LFQ_Label.txt")}
PrepareInuputFiles <- function(acquisitionmethods, rawdataset, lable){
#path_1 <- "MaxQuant_proteinGroups_LFQ.txt"
path_1 <- rawdataset
pre_file2_1 <- readLines(path_1, n = 2)
loc <- which.max(c(length(unlist(strsplit(pre_file2_1, ","))), length(unlist(strsplit(pre_file2_1, ";"))), length(unlist(strsplit(pre_file2_1, "\t")))))
sep_seq <- c(",", ";", "\t")
data1 <- read.csv(path_1,header=TRUE,sep=sep_seq[loc],stringsAsFactors = FALSE,check.names=FALSE)
#path_2 <- "MaxQuant_LFQ_Label.txt"
path_2 <- lable
pre_file2_2 <- readLines(path_2, n = 1)
loc <- which.max(c(length(unlist(strsplit(pre_file2_2, ","))), length(unlist(strsplit(pre_file2_2, ";"))), length(unlist(strsplit(pre_file2_2, "\t")))))
sep_seq <- c(",", ";", "\t")
data2 <- read.csv(path_2,header=TRUE,sep=sep_seq[loc],stringsAsFactors = FALSE,check.names=FALSE)
if( acquisitionmethods=="1" ){
dataa<-data1
if(any(grepl("R.Condition",colnames(dataa))) && any(grepl("R.Replicate",colnames(dataa))) && any(grepl("R.FileName",colnames(dataa))) &&
any(grepl("EG.MinProfileQvalue",colnames(dataa))) && any(grepl("FG.TotalPeakArea",colnames(dataa))) &&
any(grepl("FG.Charge",colnames(dataa)))&&any(grepl("EG.ProteinId",colnames(dataa)))){
dataa1 <- dataa[,c("R.FileName","EG.ProteinId","FG.NormalizedTotalPeakArea")]
mydata3 <- spread(data=dataa1, key=EG.ProteinId, value=FG.NormalizedTotalPeakArea)
row.names(mydata3) <- mydata3[,"R.FileName"]
mydata3 <- mydata3[,-1]
datalabel<-data2
M_sample<-match(datalabel[,1], row.names(mydata3))
Lable <- datalabel[,2]
res<-cbind(Lable, mydata3[M_sample,])
message("the Resulting Data File Generated by the Quantification Software: Spectronaut")
}else if(any(grepl("transition_group_id",colnames(dataa))) && any(grepl("run_id",colnames(dataa))) && any(grepl("filename",colnames(dataa))) &&
any(grepl("decoy",colnames(dataa))) && any(grepl("Intensity",colnames(dataa))) &&
any(grepl("ProteinName",colnames(dataa)))&&any(grepl("total_xic",colnames(dataa)))){
dataa1 <- dataa[,c("filename","ProteinName","Intensity")]
mydata3 <- spread(
data=dataa1,
key=ProteinName,
value=Intensity
)
row.names(mydata3) <- mydata3[,"filename"]
mydata3 <- mydata3[,-1]
datalabel<-data2
M_sample<-match(datalabel[,1], row.names(mydata3))
Lable <- datalabel[,2]
res<-cbind(Lable, mydata3[M_sample,])
message("the Resulting Data File Generated by the Quantification Software: OpenSWATH")
}else if(any(grepl("ReplicateName",colnames(dataa))) && any(grepl("FileName",colnames(dataa))) && any(grepl("ProteinName",colnames(dataa))) &&
any(grepl("PrecursorCharge",colnames(dataa))) && any(grepl("IsDecoy",colnames(dataa))) &&
any(grepl("TotalArea",colnames(dataa)))&&any(grepl("annotation_QValue",colnames(dataa)))){
dataa1 <- dataa[,c("ReplicateName","ProteinName","TotalArea")]
mydata3 <- spread(
data=dataa1,
key=ProteinName,
value=TotalArea
)
row.names(mydata3) <- mydata3[,"ReplicateName"]
mydata3 <- mydata3[,-1]
datalabel<-data2
M_sample<-match(datalabel[,1], row.names(mydata3))
Lable <- datalabel[,2]
res <- cbind(Lable, mydata3[M_sample,])
message("the Resulting Data File Generated by the Quantification Software: Skyline")
}else if(any(grepl("rotein Key",colnames(dataa))) && any(grepl("_Prob",colnames(dataa))) && any(grepl("_Peptides",colnames(dataa))) &&
any(grepl("_PSMs",colnames(dataa))) && any(grepl("_Top6Top6Freq",colnames(dataa)))) {
sampleID<-grep("_Top6Top6Freq",colnames(dataa))
proID<-grep("rotein Key",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: DIA-UMPIRE")
}else if(any(grepl("Protein",colnames(dataa))) && any(grepl("Peptide",colnames(dataa))) && any(grepl("Precursor MZ",colnames(dataa))) &&
any(grepl("Precursor Charge",colnames(dataa))) && any(grepl("Fragment MZ",colnames(dataa))) && any(grepl("Fragment Charge",colnames(dataa))) &&
any(grepl("Ion Type",colnames(dataa)))){
proID<-grep("Protein",colnames(dataa))
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(dataa))
result_data<-rbind(label=as.character(datalabel),dataa[,M_sample])
inf<-c("label",as.character(dataa[,proID]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: PeakView")
}
}
if( acquisitionmethods=="2" ){
dataa<-data1
num1<-match("Normalized abundance",colnames(dataa))
num2<-match("Raw abundance",colnames(dataa))
if(!is.na(match("Protein IDs",colnames(dataa)))
&&!is.na(match("Majority protein IDs",colnames(dataa)))
&&!is.na(grep("LFQ intensity.",colnames(dataa)))){
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(dataa))
result<-rbind(label=as.character(datalabel),dataa[,M_sample])
inf<-c("label",as.character(dataa[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: MaxQuant")
}else if(!is.na(num1)&& !is.na(num2) && dataa[2,1]=="Accession" && dataa[2,2]=="Peptide count"){
result_data<-dataa[-1,c(1,num2:ncol(dataa))]
colnames(result_data)<-unlist(as.list(result_data[1,]))
result_data<-result_data[-1,]
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(result_data))
result<-rbind(label=as.character(datalabel),result_data[,M_sample])
inf<-c("label",as.character(result_data[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Progenesis QI")
}else if(grepl("Protein Group",colnames(dataa)[1])&& grepl("Protein ID",colnames(dataa)[2]) && grepl("Accession",colnames(dataa)[3])
&& !is.na(match("#Peptides",colnames(dataa)))&& !is.na(match("#Unique",colnames(dataa))) && any(grepl("Area ",colnames(dataa)))){
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(dataa))
result<-rbind(label=as.character(datalabel),dataa[,M_sample])
inf<-c("label",as.character(dataa[,"Accession"]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: PEAKS")
}else if(grepl("Samples report created on",colnames(dataa)[1]) && any(grepl("Scaffold: Version: Scaffold",dataa[,2]))){
a<-match("#",dataa[,1])
result<-dataa[(a-2):(nrow(dataa)-2),]
b<-which("Accession Number"==result[3,])
sample<-which("Quantitative Value (Total Precursor Intensity)"==result[1,])
result<-result[,c(b,sample)]
colnames(result)<-unlist(as.list(result[3,]))
result<-result[-(1:3),]
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Scaffold")
}else if(any(grepl("e-Value",colnames(dataa))) && any(grep("Score Type",colnames(dataa)))&& any(grep("Protein Id",colnames(dataa))) && any(grep("Total Intensity",colnames(dataa)))){
sampleID<-grep("Total Intensity",colnames(dataa))
proID<-grep("Protein Id",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Proteios SE")
}else if(any(grepl("Accession",colnames(dataa))) && any(grepl("Description",colnames(dataa))) && any(grepl("# Peptides",colnames(dataa))) && any(grep("# PSMs",colnames(dataa)))&&
any(grep("# Unique Peptides",colnames(dataa))) && any(grep("# Protein Groups",colnames(dataa)))&& any(grep("# AAs",colnames(dataa))) && any(grep("Abundances",colnames(dataa)))){
sampleID<-grep("Abundances:",colnames(dataa))
proID<-grep("Accession",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Thermo Proteome Discoverer")
}else if(any(grepl("#QPep",colnames(dataa))) && any(grepl("#IPep",colnames(dataa))) && any(grepl("Coverage ",colnames(dataa))) &&
any(c(grep("accession",colnames(dataa)),grep("Accession",colnames(dataa))))){
proID<-c(grep("accession",colnames(dataa)),grep("Accession",colnames(dataa)))
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(dataa))
result_data<-rbind(label=as.character(datalabel),dataa[, M_sample])
inf<-c("label",as.character(dataa[,proID]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: MFPaQ")
}else if(any(grepl("Protein",colnames(dataa))) && any(grepl("Peptide",colnames(dataa))) && any(grepl("Charge",colnames(dataa))) &&
any(grepl("rt",colnames(dataa))) && any(grepl("mz",colnames(dataa))) && any(grepl("width",colnames(dataa))) &&
any(grepl("intensity",colnames(dataa)))){
proID<-grep("Protein",colnames(dataa))
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(dataa))
result_data<-rbind(label=as.character(datalabel),dataa[, M_sample])
inf<-c("label",as.character(dataa[,proID]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: OpenMS")
}}
if( acquisitionmethods=="3" ){
dataa<-data1
if(any(grepl("Protein IDs",colnames(dataa))) && any(grepl("Majority protein IDs",colnames(dataa))) && any(grepl("Unique peptides",colnames(dataa))) &&
any(grepl("Razor + unique peptides",colnames(dataa)),perl = TRUE) && any(grepl("MS/MS count ",colnames(dataa),ignore.case=TRUE))){
sampleID<-grep("MS/MS count ", colnames(dataa),ignore.case=TRUE)
proID<-grep("Protein IDs",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Maxquant")
}else if(grepl("Samples report",colnames(dataa)[1]) && any(grepl("Scaffold: Version: Scaffold",dataa[,2])) && any(grepl("Total Spectrum Count",dataa))){
a<-match("#",dataa[,1])
result<-dataa[(a-2):(nrow(dataa)-2),]
b<-which("Accession Number"==result[3,])
sample<-which("Total Spectrum Count"==result[1,])
result<-result[,c(b,sample)]
colnames(result)<-unlist(as.list(result[3,]))
result<-result[-(1:3),]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Scaffold")
}else if(any(grepl("PROTID",colnames(dataa))) && any(grepl("_NUMPEPSUNIQ",colnames(dataa))) && any(grepl("_NUMSPECSTOT",colnames(dataa))) &&
any(grepl("_PW",colnames(dataa)))){
sampleID<-grep("_NUMSPECSTOT",colnames(dataa))
proID<-grep("PROTID",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Abacus")
}else if(any(grepl("Census version",colnames(dataa)[2])) && any(grepl("H",dataa[,1])) && any(grepl("P",dataa[,1])) &&
any(grepl("S",dataa[,1]))){
title<-grep("PLINE",dataa[,2])
sampleID<-grep("SPEC_COUNT",dataa[title[2],])
row<-grep("P",dataa[,1])
result<-dataa[row,c(2,sampleID)]
sample<-as.character(dataa[title[2],sampleID])
colnames(result)<-c("PLINE",sample)
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: Census")
}else if(any(grepl("accession",colnames(dataa))) && any(grepl("SPEC_COUNT:",colnames(dataa)))){
sampleID<-grep("SPEC_COUNT:",colnames(dataa))
proID<-grep("accession",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: MFPaQ")
}else if(any(grepl("total # pepptides",colnames(dataa))) && any(grepl("ensembl",colnames(dataa))) && any(grepl("swissprot",colnames(dataa)))){
sampleID<-grep("total # pepptides",colnames(dataa))
proID<-grep("swissprot",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: ProteinProphet")
}else if(any(grepl("ACCESSION",colnames(dataa))) && any(grepl("PEPTIDES_COUNT",colnames(dataa)))){
sampleID<-grep("PEPTIDES_COUNT",colnames(dataa))
proID<-grep("ACCESSION",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: IRMa-hEIDI")
}else if(any(grepl("Proteins",colnames(dataa))) && any(grepl("Num of Spectra",colnames(dataa)))){
sampleID<-grep("Num of Spectra",colnames(dataa))
proID<-grep("Proteins",colnames(dataa))
result<-dataa[,c(proID,sampleID)]
datalabel<-data2
M_sample<-match(datalabel[,1],colnames(result))
result_data<-rbind(label=as.character(datalabel),result[,M_sample])
inf<-c("label",as.character(result[,1]))
rownames(result)<- inf
res <- t(result)
message("the Resulting Data File Generated by the Quantification Software: DTASelect")
}
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.