#' @title Prepare input file in standard formats of quantification tools
#' @description process the output files obtained from popular proteomic quantification software (>=18).
#' @param dataformat Input the corresponding "number" of acquisition techniques as follows:
#' If set 1, Please choose a format file unified by NOREVAR, you can refer to the sample data in the help page.
#' If set 2, Please choose the data file generated by popular software, you can refer to the sample data in the help page.
#' @param rawdata Input the name of your raw dataset directly obtained from software.
#' @param lable Input the label of your dataset. If you choose 1, you shouldn't select lable. Please follow the standard format file unified by NOREVAR.
#' @return prepareinputfiles matrix
#' @usage PrepareInuputFiles(dataformat, rawdata, lable)
#' @export PrepareInuputFiles
PrepareInuputFiles <- function(dataformat, rawdata, lable){
if (dataformat == 1){
path_1 <- rawdata
pre_file2_1 <- readLines(path_1, n = 2)
loc <- which.max(c(length(unlist(strsplit(pre_file2_1, ","))), length(unlist(strsplit(pre_file2_1, ";"))), length(unlist(strsplit(pre_file2_1, "\t")))))
sep_seq <- c(",", ";", "\t")
res <- read.csv(path_1,header=TRUE,sep=sep_seq[loc],stringsAsFactors = FALSE)
}
if (dataformat == 2){
#path_1 <- "MZmine2_output.csv"
path_1 <- rawdata
pre_file2_1 <- readLines(path_1, n = 2)
loc <- which.max(c(length(unlist(strsplit(pre_file2_1, ","))), length(unlist(strsplit(pre_file2_1, ";"))), length(unlist(strsplit(pre_file2_1, "\t")))))
sep_seq <- c(",", ";", "\t")
data1 <- read.csv(path_1,header=TRUE,sep=sep_seq[loc],stringsAsFactors = FALSE,check.names=FALSE)
#path_2 <- "mzmine2_label.txt"
path_2 <- lable
pre_file2_2 <- readLines(path_2, n = 1)
loc <- which.max(c(length(unlist(strsplit(pre_file2_2, ","))), length(unlist(strsplit(pre_file2_2, ";"))), length(unlist(strsplit(pre_file2_2, "\t")))))
sep_seq <- c(",", ";", "\t")
data2 <- read.csv(path_2,header=TRUE,sep=sep_seq[loc],stringsAsFactors = FALSE,check.names=FALSE)
dataa<-data1
num1<-match("Normalised abundance",colnames(dataa))
num2<-match("Raw abundance",colnames(dataa))
if(any(grepl("fold",colnames(dataa))) && any(grepl("tstat",colnames(dataa))) && any(grepl("pvalue",colnames(dataa))) &&
any(grepl("mzmed",colnames(dataa))) && any(grepl("rtmed",colnames(dataa)))){
sampleID1<-grep("fold",colnames(dataa))
sampleID2<-grep("tstat",colnames(dataa))
sampleID3<-grep("pvalue",colnames(dataa))
sampleID4<-grep("mzmed",colnames(dataa))
sampleID5<-grep("mzmin",colnames(dataa))
sampleID6<-grep("mzmax",colnames(dataa))
sampleID7<-grep("rtmed",colnames(dataa))
sampleID8<-grep("rtmin",colnames(dataa))
sampleID9<-grep("rtmax",colnames(dataa))
dataa2<-dataa[,-c(sampleID1,sampleID2,sampleID3,sampleID4,sampleID5,sampleID6,sampleID7,sampleID8,sampleID9)]
row.names(dataa2) <- dataa2[,1]
dataa3 <- dataa2[,-1]
dataa4 <- t(dataa3)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(dataa4))
Lable <- datalabel[,2]
res <- cbind(Lable, dataa4[M_sample,])
print("the Resulting Data File Generated by the Software: XCMS")
}else if(any(grepl("featureidx",colnames(dataa))) && any(grepl("mzmed",colnames(dataa))) && any(grepl("rtmed",colnames(dataa))) &&
any(grepl("maxint",colnames(dataa))) && any(grepl("isotopes",colnames(dataa)))){
sampleID1<-grep("mzmed",colnames(dataa))
sampleID2<-grep("rtmed",colnames(dataa))
sampleID3<-grep("maxint",colnames(dataa))
sampleID4<-grep("isotopes",colnames(dataa))
sampleID5<-grep("adducts",colnames(dataa))
sampleID6<-grep("peakgroup",colnames(dataa))
sampleID7<-grep("mzmin",colnames(dataa))
sampleID8<-grep("mzmax",colnames(dataa))
sampleID9<-grep("rtmin",colnames(dataa))
sampleID10<-grep("rtmax",colnames(dataa))
sampleID11<-grep("pvalue",colnames(dataa))
sampleID12<-grep("fold",colnames(dataa))
sampleID13<-grep("usernotes",colnames(dataa))
dataa2<-dataa[,-c(sampleID1,sampleID2,sampleID3,sampleID4,sampleID5,sampleID6,sampleID7,sampleID8,sampleID9,sampleID10,sampleID11,sampleID12,sampleID13)]
row.names(dataa2) <- dataa2[,1]
dataa3 <- dataa2[,-1]
dataa4 <- t(dataa3)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(dataa4))
Lable <- datalabel[,2]
res <- cbind(Lable, dataa4[M_sample,])
print("the Resulting Data File Generated by the Software: XCMS")
}else if(any(grepl("Charge",colnames(dataa))) && any(grepl("rt",colnames(dataa))) && any(grepl("mz",colnames(dataa))) &&
any(grepl("intensity",colnames(dataa)))){
sampleID<-grep("^intensity",colnames(dataa))
metID<-grep("Metabolite",colnames(dataa))
result<-dataa[,c(metID,sampleID)]
row.names(result) <- result[,1]
result1 <- result[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: OpenMS")
}else if(any(grepl("Average",colnames(dataa))) && any(grepl("Identity",colnames(dataa))) && any(grepl("RT",colnames(dataa)))){
sampleID11<-grep("ID",colnames(dataa))
sampleID22<-grep("Average",colnames(dataa))
sampleID33<-grep("RT",colnames(dataa))
dataa2<-dataa[,-c(sampleID11,sampleID22,sampleID33)]
dataa3 <- dataa2[-1,]
sampleID<-grep("Area",colnames(dataa3))
metID<-grep("Identity",colnames(dataa3))
result<-dataa3[,c(metID,sampleID)]
row.names(result) <- result[,1]
result1 <- result[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: MZmine")
}else if(any(grepl("Retention Time",colnames(dataa))) && any(grepl("Mass",colnames(dataa)))){
sampleID1<-grep("Time",colnames(dataa))
sampleID2<-grep("Mass",colnames(dataa))
dataa1<-dataa[,-c(sampleID1,sampleID2)]
row.names(dataa1) <- dataa1[,1]
result1 <- dataa1[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: MassProfiler Professional (MPP)")
}else if(any(grepl("Class",colnames(dataa))) && any(grepl("Mass",colnames(dataa)))&& any(grepl("Std Conc",colnames(dataa)))){
sampleID1<-grep("Name",colnames(dataa))
sampleID2<-grep("^Area",colnames(dataa))
dataa1<-dataa[,c(sampleID1,sampleID2)]
row.names(dataa1) <- dataa1[,1]
result1 <- dataa1[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: LIMSA")
}else if(any(grepl("m/z",colnames(dataa))) && any(grepl("rt",colnames(dataa)))&& any(grepl("peaks",colnames(dataa)))&& any(grepl("Width",colnames(dataa)))&& any(grepl("Max",colnames(dataa)))){
sampleID1<-grep("m/z",colnames(dataa))
sampleID2<-grep("*Intensity",colnames(dataa))
dataa1<-dataa[,c(sampleID1,sampleID2)]
row.names(dataa1) <- dataa1[,1]
result1 <- dataa1[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: MAVEN")
}else if(any(grepl("ppm",colnames(dataa))) && any(grepl("Annotation",colnames(dataa)))&& any(grepl("Width",colnames(dataa)))&& any(grepl("Type",colnames(dataa)))&& any(grepl("Intensity",colnames(dataa)))){
sampleID1<-grep("ppm",colnames(dataa))
sampleID2<-grep("*Intensity",colnames(dataa))
dataa1<-dataa[,c(sampleID1,sampleID2)]
row.names(dataa1) <- dataa1[,1]
result1 <- dataa1[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: MestReNova")
}else if(!is.na(num1)&& !is.na(num2) && dataa[2,1]=="Compound" && dataa[2,3]=="m/z"){
result_data<-dataa[-1,c(1,num2:ncol(dataa))]
colnames(result_data)<-unlist(as.list(result_data[1,]))
result_data<-result_data[-1,]
datalabel<-data2[,2]
M_sample<-match(data2[,1],colnames(result_data))
result<-rbind(label=as.character(datalabel),result_data[,M_sample])
inf<-c("label",as.character(result_data[,1]))
rownames(result)<- inf
res <- t(result)
res[1:12,1:12]
message("the Resulting Data File Generated by the Quantification Software: Progenesis QI")
}else if(any(grepl("Formula",colnames(dataa))) && any(grepl("Annotation",colnames(dataa)))&& any(grepl("Molecular",colnames(dataa)))&& any(grepl("RT",colnames(dataa)))&& any(grepl("Area",colnames(dataa)))){
sampleID1<-grep("Name",colnames(dataa))
sampleID2<-grep("*Area",colnames(dataa))
dataa1<-dataa[,c(sampleID1,sampleID2)]
row.names(dataa1) <- dataa1[,1]
result1 <- dataa1[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: Compound Discoverer")
}else if(any(grepl("Ret.Time",colnames(dataa))) && any(grepl("m/z",colnames(dataa)))&& any(grepl("Saturated",colnames(dataa)))){
#sampleID1<-grep("Name",colnames(dataa))
sampleID1<-grep("Ret.Time",colnames(dataa))
sampleID2<-grep("m/z",colnames(dataa))
sampleID3<-grep("Included",colnames(dataa))
sampleID4<-grep("Saturated",colnames(dataa))
dataa1<-dataa[,-c(sampleID1,sampleID2,sampleID3,sampleID4)]
row.names(dataa1) <- dataa1[,1]
result1 <- dataa1[,-1]
result2 <- t(result1)
datalabel <- data2
M_sample <- match(datalabel[,1], row.names(result2))
Lable <- datalabel[,2]
res <- cbind(Lable, result2[M_sample,])
print("the Resulting Data File Generated by the Software: MarkerLynx")
}else{
message("Please confirm the format of your data, and you can refer to the sample data!")
}}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.