Nothing
UFAx_workflow <- function(spreadsheet) {
##
tryCatch(gc(), error = function(e) {NULL}, warning = function(w) {NULL})
tryCatch(closeAllConnections(), error = function(e) {NULL}, warning = function(w) {NULL})
##
exhaustive_chemical_enumeration_annotated_table <- NULL
##
##############################################################################
##
initiation_time <- Sys.time()
IPA_message("Initiated testing the spreadsheet consistency!", failedMessage = FALSE)
##
checkpoint_parameter <- FALSE
##
if (typeof(spreadsheet) == "list") {
if (ncol(spreadsheet) >= 4) {
PARAM_exECS <- cbind(spreadsheet[, 2], spreadsheet[, 4])
checkpoint_parameter <- TRUE
##
} else if (ncol(spreadsheet) == 2) {
PARAM_exECS <- spreadsheet
checkpoint_parameter <- TRUE
##
} else {
IPA_message("The UFAx spreadsheet tab was not produced properly!")
}
} else if (typeof(spreadsheet) == "character") {
if (length(spreadsheet) == 1) {
if (file.exists(spreadsheet)) {
PARAM_exECS <- readxl::read_xlsx(spreadsheet, sheet = "exhaustive_chemical_enumeration")
PARAM_exECS <- cbind(PARAM_exECS[, 2], PARAM_exECS[, 4])
checkpoint_parameter <- TRUE
} else {
IPA_message("The `exhaustive_chemical_enumeration` spreadsheet tab not found! It should be an Excel file with .xlsx extention!")
}
} else {
IPA_message("The UFAx spreadsheet tab was not produced properly!")
}
} else {
IPA_message("The UFAx spreadsheet tab was not produced properly!")
}
##
##############################################################################
##
if (checkpoint_parameter) {
##
x0001 <- which(PARAM_exECS[, 1] == "exECS0001")
inputHRMSfolderPath <- gsub("\\", "/", PARAM_exECS[x0001, 2], fixed = TRUE)
PARAM_exECS[x0001, 2] <- inputHRMSfolderPath
if (dir.exists(inputHRMSfolderPath)) {
MSfileName <- PARAM_exECS[which(PARAM_exECS[, 1] == "exECS0002"), 2]
if (!file.exists(paste0(inputHRMSfolderPath, "/", MSfileName))) {
checkpoint_parameter <- FALSE
IPA_message("ERROR!!! Problem with exECS0002! HRMS is not available!")
}
} else {
checkpoint_parameter <- FALSE
IPA_message("ERROR!!! Problem with exECS0001! Folder of HRMS file is not available!")
}
##
x0003 <- which(PARAM_exECS[, 1] == "exECS0003")
addressPeaklist <- gsub("\\", "/", PARAM_exECS[x0003, 2], fixed = TRUE)
PARAM_exECS[x0003, 2] <- addressPeaklist
if (dir.exists(addressPeaklist)) {
peaklistFileName <- PARAM_exECS[which(PARAM_exECS[, 1] == "exECS0004"), 2]
addressPeaklistFileName <- paste0(addressPeaklist, "/", peaklistFileName)
if (!file.exists(addressPeaklistFileName)) {
checkpoint_parameter <- FALSE
IPA_message("ERROR!!! Problem with exECS0004! peaklist is not available!")
}
} else {
checkpoint_parameter <- FALSE
IPA_message("ERROR!!! Problem with exECS0003! Folder of peaklist is not available!")
}
peaklist <- IDSL.IPA::loadRdata(addressPeaklistFileName)
noPeaks <- dim(peaklist)[1]
##
exECS0005 <- PARAM_exECS[which(PARAM_exECS[, 1] == "exECS0005"), 2]
if (is.na(exECS0005)) {
checkpoint_parameter <- FALSE
IPA_message("ERROR!!! Problem with exECS0005! This parameter should be `All` or a vector of indices!")
} else {
if (gsub(" ", "", tolower(exECS0005)) == "all") {
selectedIDpeaklist <- 1:noPeaks
IPA_message("The enitre 12C m/z values in the peaklist were placed in the processing row! Annotated molecular formulas for peak IDs are kept in the 'log_exECS_annotation_' folder!", failedMessage = TRUE)
} else {
selectedIDpeaklist <- tryCatch(eval(parse(text = paste0("c(", exECS0005, ")"))), error = function(e) {NULL})
if (is.null(selectedIDpeaklist) | (max(selectedIDpeaklist) > noPeaks)) {
checkpoint_parameter <- FALSE
IPA_message("ERROR!!! Problem with exECS0005! The range of indices are out of the peaklist dimension!")
} else {
IPA_message("The following peak IDs were selected for processing: ", failedMessage = FALSE)
for (id in 1:length(selectedIDpeaklist)) {
IPA_message(paste0(selectedIDpeaklist[id], " - ", peaklist[selectedIDpeaklist[id], 3], " - ", peaklist[selectedIDpeaklist[id], 8]), failedMessage = FALSE)
}
}
}
}
##
x0006 <- which(PARAM_exECS[, 1] == "exECS0006")
output_path <- gsub("\\", "/", PARAM_exECS[x0006, 2], fixed = TRUE)
PARAM_exECS[x0006, 2] <- output_path
if (!dir.exists(output_path)) {
tryCatch(dir.create(output_path, recursive = TRUE), warning = function(w) {warning("Problem with exECS0006! R cannot create the folder!")})
if (!dir.exists(output_path)) {
checkpoint_parameter <- FALSE
}
}
##
number_processing_threads <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0007'), 2])
if (length(number_processing_threads) == 0) {
IPA_message("ERROR!!! Problem with exECS0007! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (number_processing_threads >= 1) {
if ((number_processing_threads %% 1) != 0) {
IPA_message("ERROR!!! Problem with exECS0007! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
} else {
IPA_message("ERROR!!! Problem with exECS0007! This parameter should be at least 1 !")
checkpoint_parameter <- FALSE
}
}
##
maxR13C <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0008'), 2])
if (length(maxR13C) == 0) {
IPA_message("ERROR!!! Problem with exECS0008! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (maxR13C <= 0) {
IPA_message("ERROR!!! Problem with exECS0008! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
}
}
############################################################################
########################### Chemical space #################################
############################################################################
B_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0009'), 2])
if (length(B_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0009! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (B_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0009! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
Br_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0010'), 2])
if (length(Br_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0010! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (Br_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0010! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
Cl_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0011'), 2])
if (length(Cl_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0011! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (Cl_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0011! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
S_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0012'), 2])
if (length(S_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0012! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (S_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0012! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
Si_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0013'), 2])
if (length(Si_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0013! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (Si_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0013! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
N_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0014'), 2])
if (length(N_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0014! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (N_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0014! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
As_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0015'), 2])
if (length(As_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0015! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (As_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0015! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
I_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0016'), 2])
if (length(I_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0016! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (I_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0016! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
O_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0017'), 2])
if (length(O_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0017! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (O_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0017! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
P_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0018'), 2])
if (length(P_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0018! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (P_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0018! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
Na_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0019'), 2])
if (length(Na_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0019! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (Na_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0019! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
##
K_MAX <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0020'), 2])
if (length(K_MAX) == 0) {
IPA_message("ERROR!!! Problem with exECS0020! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
} else {
if (K_MAX < 0) {
IPA_message("ERROR!!! Problem with exECS0020! This parameter should be a positive integer!")
checkpoint_parameter <- FALSE
}
}
############################################################################
ipw <- PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0021'), 2]
if (ipw == "[M+H/K/Na]" | ipw == "[M+H]" | ipw == "[M+Na]" | ipw == "[M+K]") {
ipw_n <- -1
} else if (ipw == "[M-H]") {
ipw_n <- +1
} else if (ipw == "[M]") {
ipw_n <- 0
} else {
IPA_message("ERROR!!! Problem with exECS0021! This parameter should be any of '[M+H/K/Na]', '[M-H]', '[M]'!")
checkpoint_parameter <- FALSE
}
##
peak_spacing <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0022'), 2])
if (length(peak_spacing) == 0) {
IPA_message("ERROR!!! Problem with exECS0022! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (peak_spacing < 0) {
IPA_message("ERROR!!! Problem with exECS0022! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
}
}
##
intensity_cutoff_str <- PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0023'), 2]
if (length(intensity_cutoff_str) == 0) {
IPA_message("ERROR!!! Problem with exECS0023!")
checkpoint_parameter <- FALSE
##
c <- 5
b <- 5
br <- 5
cl <- 5
k <- 5
s <- 5
se <- 5
si <- 5
##
checkStrInt <- FALSE
tryCatch(eval(parse(text = intensity_cutoff_str)), error = function(e) {checkStrInt <- TRUE})
if (checkStrInt) {
IPA_message("ERROR!!! Problem with exECS0023!")
checkpoint_parameter <- FALSE
}
}
##
UFA_IP_memeory_variables <- tryCatch(eval(parse(text = paste0("c(", PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0024'), 2], ")"))), error = function(e) {NULL})
if (length(UFA_IP_memeory_variables) != 3) {
IPA_message("ERROR!!! Problem with exECS0024! This parameter should be a vector of three positive numbers!")
checkpoint_parameter <- FALSE
}
##
maxHalogenCounts <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0025'), 2])
if (length(maxHalogenCounts) == 0) {
IPA_message("ERROR!!! Problem with exECS0025! This parameter should be a number!")
checkpoint_parameter <- FALSE
} else {
if (maxHalogenCounts < 0) {
IPA_message("ERROR!!! Problem with exECS0025! This parameter should be a number!")
checkpoint_parameter <- FALSE
}
}
##
Na_K_x <- PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0026'), 2]
if (Na_K_x == "1" | tolower(Na_K_x) == "t" | tolower(Na_K_x) == "true") {
NaKruleCheck <- TRUE
} else if (Na_K_x == "" | Na_K_x == " " | Na_K_x == "0" | tolower(Na_K_x) == "f" | tolower(Na_K_x) == "false") {
NaKruleCheck <- FALSE
} else {
IPA_message("ERROR!!! Problem with exECS0026!")
checkpoint_parameter <- FALSE
}
##
MaxR13C <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0027'), 2]) + maxR13C
if (length(MaxR13C) == 0) {
IPA_message("ERROR!!! Problem with exECS0027! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (MaxR13C <= 0) {
IPA_message("ERROR!!! Problem with exECS0027! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
}
}
############################################################################
################# Molecular formula annotation criteria ####################
############################################################################
massAccuracy <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0028'), 2])
if (length(massAccuracy) == 0) {
IPA_message("ERROR!!! Problem with exECS0028! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (massAccuracy > 0.01) {
IPA_message("ERROR!!! Problem with exECS0028! Mass accuracy must be below `0.01 Da`")
checkpoint_parameter <- FALSE
}
}
##
maxNEME <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0029'), 2])
if (length(maxNEME) == 0) {
IPA_message("ERROR!!! Problem with exECS0029! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (maxNEME <= 0) {
IPA_message("ERROR!!! Problem with exECS0029! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
}
}
##
minPCS <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0030'), 2])
if (length(minPCS) == 0) {
IPA_message("ERROR!!! Problem with exECS0030! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (minPCS <= 0) {
IPA_message("ERROR!!! Problem with exECS0030! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
}
}
##
minNDCS <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0031'), 2])
if (length(minNDCS) == 0) {
IPA_message("ERROR!!! Problem with exECS0031! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (minNDCS <= 0) {
IPA_message("ERROR!!! Problem with exECS0031! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
}
}
##
minRCS <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0032'), 2])
if (length(minRCS) == 0) {
IPA_message("ERROR!!! Problem with exECS0032! This parameter should be between 0-100!")
checkpoint_parameter <- FALSE
} else {
if ((minRCS < 0) | (minRCS > 100)) {
IPA_message("ERROR!!! Problem with exECS0032! This parameter should be between 0-100!")
checkpoint_parameter <- FALSE
}
}
##
scoreCoefficients <- tryCatch(eval(parse(text = PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0033'), 2])), error = function(e) {NULL})
if (is.null(scoreCoefficients)) {
IPA_message("ERROR!!! Problem with exECS0033! This parameter should be a vector of five positive numbers!")
checkpoint_parameter <- FALSE
} else {
if (length(scoreCoefficients) != 5) {
IPA_message("ERROR!!! Problem with exECS0033! This parameter should be a vector of five positive numbers!")
checkpoint_parameter <- FALSE
}
}
##
maxAllowedNumberHits <- as.numeric(PARAM_exECS[which(PARAM_exECS[, 1] == "exECS0034"), 2])
if (length(maxAllowedNumberHits) == 0) {
IPA_message("ERROR!!! Problem with exECS0034! This parameter should be a positive number!")
checkpoint_parameter <- FALSE
} else {
if (maxAllowedNumberHits > 0) {
dev.offCheck <- TRUE
while (dev.offCheck) {
dev.offCheck <- tryCatch(dev.off(), error = function(e) {FALSE})
}
##
exportSpectraCheck <- TRUE
outputProfileSpectra <- paste0(output_path, "/UFAx_spectra")
if (!dir.exists(outputProfileSpectra)) {
dir.create(outputProfileSpectra, recursive = TRUE)
}
IPA_message("UFAx_spectra comparison plots with theoretical isotopic profiles are stored in the `UFAx_spectra` folder!")
} else {
exportSpectraCheck <- FALSE
exportSpectraParameters <- NULL
}
}
##
exECS0035 <- PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0035'), 2]
if (length(exECS0035) == 0) {
IPA_message("ERROR!!! Problem with exECS0035!")
checkpoint_parameter <- FALSE
} else {
if (!(tolower(exECS0035) == "yes" | tolower(exECS0035) == "no")) {
IPA_message("ERROR!!! Problem with exECS0035!")
checkpoint_parameter <- FALSE
}
}
##
if (tolower(exECS0035) == "yes") {
IonPathways <- tryCatch(eval(parse(text = paste0("c(", PARAM_exECS[which(PARAM_exECS[, 1] == 'exECS0036'), 2], ")"))), error = function(e) {NULL})
if (is.null(IonPathways)) {
IPA_message("ERROR!!! Problem with exECS0036!")
checkpoint_parameter <- FALSE
}
##
exECS0037 <- which(PARAM_exECS[, 1] == 'exECS0037')
if (length(exECS0037) == 0) {
IPA_message("ERROR!!! Problem with exECS0037! PubChem library data is not available! You should use the 'molecular_formula_library_generator' module to produce the molecular formula library!")
checkpoint_parameter <- FALSE
} else {
PubChem_library_path <- gsub("\\", "/", PARAM_exECS[exECS0037, 2], fixed = TRUE)
PARAM_exECS[exECS0037, 2] <- PubChem_library_path
if (!file.exists(PubChem_library_path)) {
IPA_message("ERROR!!! Problem with exECS0037! PubChem library data is not available! You should use the 'molecular_formula_library_generator' module to produce the molecular formula library!")
checkpoint_parameter <- FALSE
}
}
}
############################################################################
if (checkpoint_parameter) {
IPA_message("The spreadsheet is consistent with the IDSL.UFAx workflow!", failedMessage = FALSE)
##
Elements <- c("As", "Br", "Cl", "Na", "Si", "B", "C", "F", "H", "I", "K", "N", "O", "P", "S") # DO NOT change this order!
x_c_el <- 7 # = which(Elements == "C") # index number of Carbon
LElements <- 15 # = length(Elements)
elementSorterList <- element_sorter(ElementList = Elements, alphabeticalOrder = FALSE)
Elements_mass_abundance <- elementSorterList[[2]]
valence_vec <- elementSorterList[[3]]
Elements_mass <- do.call('c', lapply(Elements, function(el) {
Elements_mass_abundance[[el]][[1]][1]
}))
Elements_mass[6] <- 11.009305167 # To use the most abundant mass of Boron
##
##########################################################################
##
get_formula_mz <- function(query_mz, qvec, carbon_masses, atom_vec, massAccuracy) {
##
set_1 <- comboGeneral(qvec, 1, FALSE, constraintFun = "sum",
comparisonFun = c(">=", "<="),
limitConstraints = c((query_mz - massAccuracy), (query_mz + massAccuracy)),
keepResults = TRUE)
set_2 <- comboGeneral(qvec, 2, FALSE, constraintFun = "sum",
comparisonFun = c(">=", "<="),
limitConstraints = c((query_mz - massAccuracy), (query_mz + massAccuracy)),
keepResults = TRUE)
set_2_a <- set_2
set_2_a[!set_2_a %in% carbon_masses] <- 0
set_2_a[set_2_a %in% carbon_masses] <- 1
set_2 <- set_2[which(rowSums(set_2_a) > 0),]
set_3 <- comboGeneral(qvec, 3, FALSE, constraintFun = "sum",
comparisonFun = c(">=", "<="),
limitConstraints = c((query_mz - massAccuracy), (query_mz + massAccuracy)),
keepResults = TRUE)
set_3_a <- set_3
set_3_a[!set_3_a %in% carbon_masses] <- 0
set_3_a[set_3_a %in% carbon_masses] <- 1
set_3 <- set_3[which(rowSums(set_3_a) > 0),]
set_4 <- comboGeneral(qvec, 4, FALSE, constraintFun = "sum",
comparisonFun = c(">=", "<="),
limitConstraints = c((query_mz - massAccuracy), (query_mz + massAccuracy)),
keepResults = TRUE)
set_4_a <- set_4
set_4_a[!set_4_a %in% carbon_masses] <- 0
set_4_a[set_4_a %in% carbon_masses] <- 1
set_4 <- set_4[which(rowSums(set_4_a) > 0),]
set_5 <- comboGeneral(qvec, 5, FALSE, constraintFun = "sum",
comparisonFun = c(">=", "<="),
limitConstraints = c((query_mz - massAccuracy), (query_mz + massAccuracy)),
keepResults = TRUE)
set_5_a <- set_5
set_5_a[!set_5_a %in% carbon_masses] <- 0
set_5_a[set_5_a %in% carbon_masses] <- 1
set_5 <- set_5[which(rowSums(set_5_a) > 0),]
set_6 <- comboGeneral(qvec, 6, FALSE, constraintFun = "sum",
comparisonFun = c(">=", "<="),
limitConstraints = c((query_mz - massAccuracy), (query_mz + massAccuracy)),
keepResults = TRUE)
set_6_a <- set_6
set_6_a[!set_6_a %in% carbon_masses] <- 0
set_6_a[set_6_a %in% carbon_masses] <- 1
set_6 <- set_6[which(rowSums(set_6_a) > 0), ]
##
get_formula_text <- function(sety) {
if (!is.matrix(sety)) {
sety <- t(as.matrix(sety))
}
if (nrow(sety) > 0) {
form_mat <- sapply(1:(ncol(sety) - 1), function(z) {
as.character(atom_vec[as.character(sety[, z])])
})
if (!is.matrix(form_mat)) {
form_mat <- t(as.matrix(form_mat))
}
apply(form_mat, 1, paste0, collapse = "")
}
}
c(get_formula_text(set_1), get_formula_text(set_2), get_formula_text(set_3),
get_formula_text(set_4), get_formula_text(set_5), get_formula_text(set_6))
}
##
##########################################################################
##
exhaustive_chemical_enumeration_call <- function(i_mz) {
exECS_annontated_table <- NULL
##
query_mz <- as.numeric(peaklist[i_mz, 8])
R13C_PL <- as.numeric(peaklist[i_mz, 11])
##
c_min <- max(c(1, floor((R13C_PL - maxR13C)/1.0816)))
c_max <- ceiling((R13C_PL + maxR13C)/1.0816)
b_max <- min(c(B_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[6])))))
br_max <- min(c(Br_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[2])))))
cl_max <- min(c(Cl_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[3])))))
k_max <- K_MAX
s_max <- min(c(S_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[15])))))
si_max <- min(c(Si_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[5])))))
n_max <- min(c(N_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[12])))))
as_max <- min(c(As_MAX, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[1])))
f_max <- min(c(2*c_max + 3*n_max + 6, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[8])))))
i_max <- min(c(I_MAX, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[10])))
h_max <- min(c(2*c_max + 3*n_max + 6, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[9])))))
na_max <- Na_MAX
o_max <- min(c(O_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[13])))))
p_max <- min(c(P_MAX, max(c(0, floor((query_mz - c_min*Elements_mass[x_c_el])/Elements_mass[14])))))
##
minfreq <- rep(0, LElements)
minfreq[x_c_el] <- c_min
maxfreq <- c(as_max, br_max, cl_max, na_max, si_max, b_max, c_max, f_max, h_max, i_max, k_max, n_max, o_max, p_max, s_max)
#
qvec <- unlist(lapply(1:LElements, function(x) {
xvec <- round(sapply(c(minfreq[x]:maxfreq[x]), function(yy) {yy*Elements_mass[x]}), digits = 3)
names(xvec) <- sapply(c(minfreq[x]:maxfreq[x]), function(yy) {paste0(Elements[x], yy)})
xvec
}))
qvec <- qvec[which(qvec != 0)]
atom_vec <- names(qvec)
names(atom_vec) <- as.character(qvec)
carbon_masses <- Elements_mass[x_c_el]*(minfreq[x_c_el]:maxfreq[x_c_el])
molecular_formula <- get_formula_mz(query_mz, qvec, carbon_masses, atom_vec, massAccuracy)
########################################################################
if (length(molecular_formula) > 0) {
MoleFormVecMat <- do.call(rbind, lapply(molecular_formula, function(molf) {
molvec <- formula_vector_generator(molf, Elements, LElements, allowedRedundantElements = FALSE) # `allowedRedundantElements` must be `FALSE` for UFAx
if (molvec[x_c_el] > 0) {
molvec
}
}))
######################################################################
LMoleFormVecMat <- length(MoleFormVecMat)/15
if (LMoleFormVecMat > 0) {
####################################################################
## xCondition <- which((h + cl + br + f + i) >= (c/2 - n - 1) & (h + cl + br + f + i) <= (2*c + 3*n + 6))
if (LMoleFormVecMat == 1) {
sumClBrFI <- sum(MoleFormVecMat[c(2, 3, 8, 10)])
sumHClBrFI <- sumClBrFI + MoleFormVecMat[9]
} else {
sumClBrFI <- rowSums(MoleFormVecMat[, c(2, 3, 8, 10)])
sumHClBrFI <- sumClBrFI + MoleFormVecMat[, 9]
}
xCondition <- which(sumHClBrFI >= (1/2*MoleFormVecMat[, x_c_el] - MoleFormVecMat[, 12] - 1) &
sumHClBrFI <= (2*MoleFormVecMat[, x_c_el] + 3*MoleFormVecMat[, 12] + 6))
sumHClBrFI <- NULL
##
if (length(xCondition) > 0) {
if (maxHalogenCounts > 0) {
xC <- which(sumClBrFI[xCondition] <= maxHalogenCounts)
if (length(xC) > 0) {
xCondition <- xCondition[xC]
} else {
xCondition <- NULL
}
}
sumClBrFI <- NULL
##
if (length(xCondition) > 0) {
if (NaKruleCheck) {
xC <- which((MoleFormVecMat[xCondition, 4] + MoleFormVecMat[xCondition, 11]) <= 1)
if (length(xC) > 0) {
xCondition <- xCondition[xC]
} else {
xCondition <- NULL
}
}
##
LMoleFormVecMat <- length(xCondition)
if (LMoleFormVecMat > 0) {
if (LMoleFormVecMat == 1) {
MoleFormVecMat <- matrix(MoleFormVecMat[xCondition, ], nrow = 1)
} else {
MoleFormVecMat <- MoleFormVecMat[xCondition, ]
}
##
seniorRuleMatCheck <- do.call('c', lapply(1:LMoleFormVecMat, function(el) {# Extended SENOIR rule
extendedSENIORrule(MoleFormVecMat[el, ], valence_vec, ionization_correction = ipw_n)
}))
LMoleFormVecMat <- length(which(seniorRuleMatCheck))
if (LMoleFormVecMat > 0) {
if (LMoleFormVecMat == 1) {
MoleFormVecMat <- matrix(MoleFormVecMat[seniorRuleMatCheck, ], nrow = 1)
} else {
MoleFormVecMat <- MoleFormVecMat[seniorRuleMatCheck, ]
MoleFormVecMat <- unique(as.matrix(MoleFormVecMat)) # To remove redundant rows
}
##
molecularFormulaDatabase <- list(Elements, MoleFormVecMat, rep(1, LMoleFormVecMat))
##
MoleFormVecMat <- NULL
##
IPDB_exECS <- molecularFormula2IPdb(molecularFormulaDatabase, retentionTime = NULL, peak_spacing, intensity_cutoff_str, IonPathways = "[M]",
number_processing_threads = 1, UFA_IP_memeory_variables, allowedMustRunCalculation = FALSE, allowedVerbose = FALSE)
##
exECS_annontated_table <- molecular_formula_annotator(IPDB_exECS, spectraList, peaklist, selectedIPApeaks = i_mz, massAccuracy, maxNEME, minPCS,
minNDCS, minRCS, scoreCoefficients, RTtolerance = NA, correctedRTpeaklist = NULL,
exportSpectraParameters, number_processing_threads = 1)
}
}
}
}
}
}
save(exECS_annontated_table, file = paste0(output_path_log, i_mz, '_exECS_annotation.Rdata'))
##
return(exECS_annontated_table)
}
##########################################################################
outputer <- IDSL.IPA::IPA_MSdeconvoluter(inputHRMSfolderPath, MSfileName)
spectraList <- outputer[["spectraList"]]
msPolarity <- outputer[["msPolarity"]]
outputer <- NULL
## Creating the log folder
output_path_log <- paste0(output_path, "/log_exECS_annotation_", MSfileName, "/")
if (!dir.exists(output_path_log)) {
tryCatch(dir.create(output_path_log, recursive = TRUE), warning = function(w){stop("Can't create the log folder!!!")})
}
##
if (exportSpectraCheck) {
exportSpectraParameters <- c(maxAllowedNumberHits, MSfileName, msPolarity, outputProfileSpectra)
} else {
exportSpectraParameters <- NULL
}
##
IPA_message("Initiated the exhaustive chemical enumeration analysis!!!", failedMessage = FALSE)
##
##########################################################################
##
if (number_processing_threads == 1) {
##
exhaustive_chemical_enumeration_annotated_table <- do.call(rbind, lapply(selectedIDpeaklist, function(i_mz) {
exhaustive_chemical_enumeration_call(i_mz)
}))
##
} else {
##
########################################################################
##
osType <- Sys.info()[['sysname']]
if (osType == "Windows") {
##
clust <- makeCluster(number_processing_threads)
clusterExport(clust, setdiff(ls(), c("clust", "selectedIDpeaklist")), envir = environment())
##
exhaustive_chemical_enumeration_annotated_table <- do.call(rbind, parLapply(clust, selectedIDpeaklist, function(i_mz) {
exhaustive_chemical_enumeration_call(i_mz)
}))
##
stopCluster(clust)
##
######################################################################
##
} else {
##
exhaustive_chemical_enumeration_annotated_table <- do.call(rbind, mclapply(selectedIDpeaklist, function(i_mz) {
exhaustive_chemical_enumeration_call(i_mz)
}, mc.cores = number_processing_threads))
closeAllConnections()
}
}
##
##########################################################################
##
rownames(exhaustive_chemical_enumeration_annotated_table) <- NULL
if (tolower(exECS0035) == "yes") {
IPA_message("Initiated searching in the library of molecular formula!!!", failedMessage = FALSE)
MFlibrary <- IDSL.IPA::loadRdata(PubChem_library_path)
exhaustive_chemical_enumeration_annotated_table <- molecular_formula_library_search(exhaustive_chemical_enumeration_annotated_table, MFlibrary, IonPathways, number_processing_threads)
IPA_message("Completed searching in the library of molecular formula!!!", failedMessage = FALSE)
}
##
save(exhaustive_chemical_enumeration_annotated_table, file = paste0(output_path, "/exhaustive_chemical_enumeration_annotated_table_", MSfileName, ".Rdata"))
write.csv(exhaustive_chemical_enumeration_annotated_table, file = paste0(output_path, "/exhaustive_chemical_enumeration_annotated_table_", MSfileName, ".csv"), row.names = TRUE)
IPA_message("Completed the exhaustive chemical enumeration analysis!!!", failedMessage = FALSE)
required_time <- Sys.time() - initiation_time
print(required_time)
}
}
return(exhaustive_chemical_enumeration_annotated_table)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.