# LOB_lpsolve
LOB_lpsolve <- function(LOBpeaklist,choose_class=NULL,save.files=FALSE,use_ms2=FALSE,plot_data = FALSE) {
#library(lpSolve)
### Check Inputs ###
if (!class(LOBpeaklist)=="data.frame") {
stop("Input 'LOBpeaklist' is not an 'data.frame' object.\n",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
if (is.null(LOBpeaklist$match_ID)) {
stop("Input data.frame does not contain a 'match_ID' column.",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
if (is.null(LOBpeaklist$compound_name)) {
stop("Input data.frame does not contain a 'compound_name' column.",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
if (is.null(LOBpeaklist$LOBdbase_mz)) {
stop("Input data.frame does not contain a 'LOBdbase_mz' column.",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
if (is.null(LOBpeaklist$peakgroup_rt)) {
stop("Input data.frame does not contain a 'peakgroup_rt' column.",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
if (is.null(LOBpeaklist$FA_total_no_C)) {
stop("Input data.frame does not contain a 'FA_total_no_C' column.",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
if (is.null(LOBpeaklist$FA_total_no_DB)) {
stop("Input data.frame does not contain a 'FA_total_no_DB' column.",
"Please use a data.frame generated by 'getLOBpeaklist'.")
}
### Define Helper Functions ###
showplots<-function(X,extra){
#for (i in 1:length(unique(X$species))){
#run <- X[X$species == unique(X$species)[i],]
#run <-run[run$match_ID %in% LOBpeaklist$match_ID,]
print(ggplot(X,aes(x = peakgroup_rt, y = LOBdbase_mz,color=lpSolve)) +
scale_color_manual(values=c("Maybe" = "#e7cd08","No"="#e70808","Yes"="#08e799")) +
geom_point() +
geom_text(label=paste0(as.character(X$FA_total_no_C), ":",
as.character(X$FA_total_no_DB)),
hjust = 1,
vjust = 2,
size=3,
color="black")+
ggtitle(paste0("lpSolve Screened Data - ", as.character(X$species),"-",extra)) +
xlab("Peak Group Retention Time (sec)")+
ylab("Peak Group m/z")
)}
screen<-function(X){
#for (k in 1:length(unique(X$species))) {
run <- X
return <- done
#Binary string for each point
Binary_String <- rep(1, nrow(run))
#Empty String we will build a restrictions from
Empty_String <- rep(0, nrow(run))
#Matrix of our Exclusions
Exclusion_Matrix <- matrix(nrow = 1,ncol = nrow(run))
# Run a loop to find what to exclude for each point
cat("\n")
for (i in 1:nrow(run)) {
#Get our row
subject <- run[i,]
Exclusion_Table <- run
# Make a table to store our exclusion info
Exclusion_Table$Exclude <- rep(FALSE,nrow(run))
#lets add more info about the peaks to our exclusion table based on info in it
Exclusion_Table$C_num_diff <- Exclusion_Table$FA_total_no_C - subject$FA_total_no_C
Exclusion_Table$DB_num_diff <- Exclusion_Table$FA_total_no_DB - subject$FA_total_no_DB
Exclusion_Table$diff_factor <- Exclusion_Table$C_num_diff/Exclusion_Table$DB_num_diff
#Lets sort the compounds run above and below our point in terms of rt
lower_rt <- Exclusion_Table[which(Exclusion_Table$peakgroup_rt < subject$peakgroup_rt),]
higher_rt <- Exclusion_Table[which(Exclusion_Table$peakgroup_rt > subject$peakgroup_rt),]
#Now find ones that break the rules for lower and higher and set Exclude to TRUE in the Exclusion_Table
#Exclude the lower with more carbon and less double bonds
lower_match_ID <- lower_rt[lower_rt$FA_total_no_C>=subject$FA_total_no_C & lower_rt$FA_total_no_DB<=subject$FA_total_no_DB,"match_ID"]
Exclusion_Table[which(Exclusion_Table$match_ID %in% lower_match_ID),"Exclude"] <- TRUE
lower_diff <- lower_rt[which(lower_rt$diff_factor>0 & lower_rt$diff_factor<=1 & lower_rt$C_num_diff<0),]
lower_diff_exclude <- lower_diff[which(lower_diff$C_num_diff==-1 & lower_diff$DB_num_diff==-1),]
if (nrow(lower_diff_exclude)>0){
lower_diff <- lower_diff[!(lower_diff$match_ID %in% lower_diff_exclude$match_ID),]
}
lower_diff_names <- row.names(lower_diff)
Exclusion_Table[lower_diff_names,"Exclude"] <- TRUE
#Exclude the higher
higher_match_ID <- higher_rt[higher_rt$FA_total_no_C<=subject$FA_total_no_C & higher_rt$FA_total_no_DB>=subject$FA_total_no_DB,'match_ID']
Exclusion_Table[higher_match_ID,"Exclude"] <- TRUE
higher_diff <- higher_rt[which(higher_rt$diff_factor>0 & higher_rt$diff_factor<=1 & higher_rt$C_num_diff>0),]
higher_diff_exclude <- higher_diff[which(higher_diff$C_num_diff==1 & higher_diff$DB_num_diff==1),]
if (nrow(higher_diff_exclude)>0){
higher_diff <- higher_diff[!(higher_diff$match_ID %in% higher_diff_exclude$match_ID),]
}
higher_diff_names <- row.names(higher_diff)
Exclusion_Table[higher_diff_names,"Exclude"] <- TRUE
#Exclude the compounds with the same name
Exclusion_Table[which(Exclusion_Table$compound_name==subject$compound_name),"Exclude"] <- TRUE
Exclusion_String <- Empty_String
for (j in 1:nrow(run)) {
if(j!=i){
if(Exclusion_Table[j,"Exclude"]==TRUE){
Exclusion_String <- Empty_String
Exclusion_String[j]<-1
Exclusion_String[i]<-1
Exclusion_Matrix <- rbind(Exclusion_Matrix,Exclusion_String)
rownames(Exclusion_Matrix) <- NULL
Exclusion_Matrix <- unique(Exclusion_Matrix)
}
}
cat("\r")
flush.console()
cat("Writing rules for",as.character(unique(X$species)),"compound number",i,"of",nrow(run),". Number of Rules created:",nrow(Exclusion_Matrix)-1,"...")
}
}
cat("Done")
Final_Exclusion_Matrix <- Exclusion_Matrix[-1,]
if(NROW(Final_Exclusion_Matrix)==0){
cat("\n No rules created. Compound class is to small or any lacks noise to screen. Returning all points as Yes")
return[return$match_ID %in% run$match_ID,"lpSolve"] <- "Yes"
}else{
if (NCOL(Final_Exclusion_Matrix)==1) {
rows <- 1
Final_Exclusion_Matrix <- t(matrix(Final_Exclusion_Matrix))
}else{
rows <- nrow(Final_Exclusion_Matrix)
}
#time to screen
dir <- rep("<=", rows) # all constraints '<='
rhs <- rep(1, rows) # all right hand sides = 1
#all.bin for binary. Set Solution number high to get all solutions.
cat("\nApplying lpSolve algorythm...")
gc()
sol <- lp("max", Binary_String, Final_Exclusion_Matrix, dir, rhs,all.bin = TRUE,num.bin.solns = 100)
gc()
cat(" Done")
numcols <- nrow(run)
numsols <- sol$num.bin.solns
solutions <- matrix(head(sol$solution, numcols*numsols), nrow=numsols, byrow=TRUE)
bar <- data.frame(colSums(solutions))
run$Type<-rep(0,nrow(run))
run[which(bar==0),"Type"] <- 'No'
run[which(bar!=nrow(solutions) & bar!=0 ),"Type"] <- 'Maybe'
run[which(bar==nrow(solutions)),"Type"] <- 'Yes'
return[return$match_ID %in% run$match_ID,"lpSolve"] <- run[order(run$match_ID),"Type"]
}
#}
return(return)
}
### Format our input in a 'run' dataframe
done <- LOBpeaklist
done$lpSolve <- rep(NA,length(done$match_ID))
LOBpeaklist <- LOBpeaklist[which(LOBpeaklist$degree_oxidation==0),]
if (is.null(choose_class)==FALSE) {
if(any(!(choose_class %in% unique(LOBpeaklist$species)))){
stop("Chosen 'choose_class' does not appear in the 'species' column of data.frame.")
}else{
LOBpeaklist <- LOBpeaklist[which(LOBpeaklist$species==choose_class),]}
}else{
LOBpeaklist <- LOBpeaklist[which(is.na(LOBpeaklist$FA_total_no_DB)==FALSE),]
}
for (k in 1:length(unique(LOBpeaklist$species))) {
LOBrun <- LOBpeaklist[which(LOBpeaklist$species== unique(LOBpeaklist$species)[k]),]
# Put what we need in a dataframe, only include RtF data if it exists
if(is.null(LOBrun$Flag)==FALSE){
PRErun <- data.frame(LOBrun$match_ID,
LOBrun$compound_name,
LOBrun$LOBdbase_mz,
LOBrun$peakgroup_rt,
LOBrun$FA_total_no_C,
LOBrun$FA_total_no_DB,
LOBrun$species,
LOBrun$DNPPE_Factor,
LOBrun$Flag,
LOBrun$DBase_DNPPE_RF)
#Re-name our column names
colnames(PRErun) <- c("match_ID",
"compound_name",
"LOBdbase_mz",
"peakgroup_rt",
"FA_total_no_C",
"FA_total_no_DB",
"species",
"DNPPE_Factor",
"Flag",
"DBase_DNPPE_RF")
}else{
PRErun <- data.frame(LOBrun$match_ID,
LOBrun$compound_name,
LOBrun$LOBdbase_mz,
LOBrun$peakgroup_rt,
LOBrun$FA_total_no_C,
LOBrun$FA_total_no_DB,
LOBrun$species)
#Re-name our column names
colnames(PRErun) <- c("match_ID",
"compound_name",
"LOBdbase_mz",
"peakgroup_rt",
"FA_total_no_C",
"FA_total_no_DB",
"species")
}
#If we have elected to use MS2 data, limit our options based on that
Final_Switch <- FALSE
if (use_ms2 == TRUE){
if(any(PRErun$Flag %in% "5%_rtv" | PRErun$Flag %in% "ms2v")){
Final_Switch <- TRUE
cat("\nPerforming Prescreen of MS2 data for use later for",as.character(unique(LOBpeaklist$species)[k]))
#find ms2v and %5v points from the data set
ms2v <- PRErun[which(PRErun$Flag == "ms2v"),]
rtv <- PRErun[which(PRErun$Flag == "5%_rtv"),]
ms2v <- rbind(ms2v,rtv)
#Screen for points within that that dont fit and plot the results
ms2v_screened <- screen(ms2v)
ms2v_screened_noNA <- ms2v_screened[which(is.na(ms2v_screened$lpSolve)==FALSE & ms2v_screened$species == unique(LOBpeaklist$species)[k]),]
if(plot_data == TRUE){
showplots(ms2v_screened_noNA,extra = "RtF Confirmed Only")
}
#Take only the yes points
use2screen <- ms2v_screened_noNA[ms2v_screened_noNA$lpSolve == "Yes",]
cat("\nWill use",nrow(use2screen),"verified points for screening.")
cat("\n")
#For each class eliminate everything that doesn't fit with the yes points
run <- use2screen
final_cut <- integer()
for (i in 1:nrow(run)) {
#Get our row
subject <- run[i,]
Exclusion_Table <- PRErun
# Make a table to store our exclusion info
Exclusion_Table$Exclude <- rep(FALSE,nrow(Exclusion_Table))
#lets add more info about the peaks to our exclusion table based on info in it
Exclusion_Table$C_num_diff <- Exclusion_Table$FA_total_no_C - subject$FA_total_no_C
Exclusion_Table$DB_num_diff <- Exclusion_Table$FA_total_no_DB - subject$FA_total_no_DB
Exclusion_Table$diff_factor <- Exclusion_Table$C_num_diff/Exclusion_Table$DB_num_diff
#Lets sort the compounds run above and below our point in terms of rt
lower_rt <- Exclusion_Table[which(Exclusion_Table$peakgroup_rt < subject$peakgroup_rt),]
higher_rt <- Exclusion_Table[which(Exclusion_Table$peakgroup_rt > subject$peakgroup_rt),]
#Now find ones that break the rules for lower and higher and set Exclude to TRUE in the Exclusion_Table
#Exclude the lower
lower_match_ID <- lower_rt[lower_rt$FA_total_no_C>=subject$FA_total_no_C & lower_rt$FA_total_no_DB<=subject$FA_total_no_DB,"match_ID"]
lower_diff <- lower_rt[which(lower_rt$diff_factor>0 & lower_rt$diff_factor<=1 & lower_rt$C_num_diff<0),]
lower_diff_exclude <- lower_diff[which(lower_diff$C_num_diff==-1 & lower_diff$DB_num_diff==-1),]
if (nrow(lower_diff_exclude)>0){
lower_diff <- lower_diff[!(lower_diff$match_ID %in% lower_diff_exclude$match_ID),]
}
final_cut <- c(lower_diff$match_ID,lower_match_ID,final_cut)
#Exclude the higher
higher_match_ID <- higher_rt[higher_rt$FA_total_no_C<=subject$FA_total_no_C & higher_rt$FA_total_no_DB>=subject$FA_total_no_DB,"match_ID"]
higher_diff <- higher_rt[which(higher_rt$diff_factor>0 & higher_rt$diff_factor<=1 & higher_rt$C_num_diff>0),]
higher_diff_exclude <- higher_diff[which(higher_diff$C_num_diff==1 & higher_diff$DB_num_diff==1),]
if (nrow(higher_diff_exclude)>0){
higher_diff <- higher_diff[!(higher_diff$match_ID %in% higher_diff_exclude$match_ID),]
}
final_cut <- c(higher_diff$match_ID,higher_match_ID,final_cut)
cat("\r")
flush.console()
cat("Eliminating points that do not fit. Point number",i,"of",nrow(run),". Number of points excluded:",length(unique(final_cut)),"...")
}
cat("Done!")
cat("\n")
ms2_excluded_matchids <- PRErun[PRErun$match_ID %in% final_cut,"match_ID"]
PRErun <- PRErun[!(PRErun$match_ID %in% final_cut),]
}else{
cat("\nNo points within 5% of a retention time window. Moving to next class")
next()
}
}
#Screen everything
cat("\nPerforming screening of complete dataset for",as.character(unique(LOBpeaklist$species)[k]))
screened <- screen(PRErun)
if(Final_Switch==TRUE){
screened[screened$match_ID %in% ms2_excluded_matchids,"lpSolve"] <- "No"
}
screened_plot <- screened[which(is.na(screened$lpSolve)==FALSE & screened$species == unique(LOBpeaklist$species)[k]),]
if(plot_data==TRUE){
showplots(screened_plot,extra = "Final")
}
# if (save.files==TRUE){
# ggsave(filename = paste0(as.character(run$species), "_LP_solve.tiff"),
# plot = last_plot(),
# device = "tiff",
# width = 22, height = 17)
# }
cat("\n")
if (use_ms2==TRUE & Final_Switch == TRUE) {
test <- unique(screened[which(screened$match_ID %in% use2screen$match_ID),"lpSolve"])
if("No" %in% test){
cat("\n")
warning("Not all MS2v peaks included in solution.")
done[done$match_ID %in% screened_plot$match_ID,"lpSolve"] <- screened_plot[order(screened_plot$match_ID),"lpSolve"]
}
if("Maybe" %in% test){
cat("\n")
warning("Not all MS2v peaks included in solution.")
done[done$match_ID %in% screened_plot$match_ID,"lpSolve"] <- screened_plot[order(screened_plot$match_ID),"lpSolve"]
}
cat("\n")
cat("All MS2v peaks included in solution!")
done[done$match_ID %in% screened_plot$match_ID,"lpSolve"] <- screened_plot[order(screened_plot$match_ID),"lpSolve"]
}else{
done[done$match_ID %in% screened_plot$match_ID,"lpSolve"] <- screened_plot[order(screened_plot$match_ID),"lpSolve"]
cat("Class Screened without MS2 data.")
}
cat("\n")
}
return(done)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.