#' Import files generated by MSFinder, both formula and structure at the same time.
#'
#' @eval recurrent_params("source", "level")
#' @return MSFinder formula and structure data joined together.
import_msfinder_data <- function(source, level) {
msf_formula <- import_msfinder_formula_file(source, level)
msf_structure <- import_msfinder_structure_file(source, level)
if (is.null(msf_formula) | is.null(msf_structure)) return(NULL)
global <- merge(msf_formula,
msf_structure,
by = c("Alignment.ID", "Title", "MS1.count", "MSMS.count", "PRECURSORMZ", "PRECURSORTYPE", "Formula"),
suffixes = c(".formula", ".structure"),
all.x = TRUE)
global <- global[!is.na(global$Structure),]
if (nrow(global) == 0) return(NULL)
else {
global$source <- source
global$id <- as.character(paste0(global$source, "_", global$Alignment.ID))
global$level <- level
if (level == "generic") global$Links <- global$Databases.structure
else {
# Extraction columns from Databases.structure column
# (MSFinder concatenates all non-necessary columns in the column "Databases")
other_columns <- extract_concatenated_data_from_column(data.frame(global$Databases.structure))
global <- cbind(global, other_columns)
if(!"Compound_level" %in% names(global)) global$Compound_level <- NA # if Compound_level not found, creation of the column
}
global$Databases.structure <- NULL
global[global == "-"] <- NA
return(global)
}
}
#' Import a formula file generated by MSFinder.
#'
#' @eval recurrent_params("source", "level")
#' @return A data.frame containing MSFinder formula data.
import_msfinder_formula_file <- function(source, level) {
filename <- get_project_file_path("msfinder_data", source = source, msfinder_info = "Formula", msfinder_lvl = level)
if (is.na(filename)) return(NULL)
formula <- import_msfinder_base_file(filename, c("Theoretical.mass", "Mass.error", "Formula.score", "Databases"))
names(formula)[names(formula) == "Formula.rank"] <- "Formula"
formula <- formula[!is.na(formula$Formula),]
formula <- formula[, c("Alignment.ID", "Title", "MS1.count", "MSMS.count", "PRECURSORMZ", "PRECURSORTYPE", "rank",
"Formula", "Theoretical.mass", "Mass.error", "Formula.score", "Databases")]
return(formula)
}
#' Import a structure file generated by MSFinder.
#'
#' @eval recurrent_params("source", "level")
#' @return A data.frame containing MSFinder structure data.
import_msfinder_structure_file <- function(source, level) {
filename <- get_project_file_path("msfinder_data", source = source, msfinder_info = "Structure", msfinder_lvl = level)
if (is.na(filename)) return(NULL)
structure <- import_msfinder_base_file(filename, c("Total.score", "Databases", "Formula", "Ontology", "InChIKey", "SMILES"))
names(structure)[names(structure) == "Structure.rank"] <- "Structure"
structure <- structure[!is.na(structure$Structure),]
structure <- structure[, c("Alignment.ID", "Title", "MS1.count", "MSMS.count", "PRECURSORMZ", "PRECURSORTYPE", "rank",
"Structure", "Total.score", "Databases", "Formula", "Ontology", "InChIKey", "SMILES")]
return(structure)
}
#' Generic function for importing a file generated by MSFinder
#'
#' @param filepath The path of the MSFinder file to import.
#' @param columns_to_increment A list of strings indicating which columns needs to have the final number in their name incremented.
#' @return A data.frame containing MSFinder data.
import_msfinder_base_file <- function(filepath, columns_to_increment) {
finder_data <- utils::read.csv(filepath, sep = "\t", na.strings = c("", "-"))
finder_data$File.path <- NULL
# Alignment ID extraction
finder_data <- splitstackshape::cSplit(finder_data, "File.name", sep = "_", direction = "wide") # returns a data.table
finder_data <- splitstackshape::cSplit(finder_data,
names(finder_data)[grep("File.name", colnames(finder_data))[1]], # returns 1st column File.name_X
sep = " ",
direction = "wide")
finder_data$Alignment.ID <- finder_data[, ncol(finder_data), with = FALSE]
finder_data <- finder_data[-grep("File.name", names(finder_data))] # delete all columns File.name*
# Renaming ranked elements
names(finder_data) <- increment_strings(names(finder_data), columns_to_increment)
# Reformating file
finder_data <- reshape2::melt(as.data.frame(finder_data),
id.vars = c("Alignment.ID", "Title", "MS1.count", "MSMS.count", "PRECURSORMZ", "PRECURSORTYPE"))
# Extract rank from columns names ("name.1", "othername.10")
finder_data$rank <- substrRight(as.character(finder_data$variable), 2) # (".1", "10")
finder_data$rank <- as.numeric(gsub(".", "", finder_data$rank, fixed = TRUE)) # (1, 10)
finder_data$variable <- gsub('\\.\\d+$', '', as.character(finder_data$variable)) # ("name", "othername")
finder_data <- reshape2::dcast(finder_data,
Alignment.ID + Title + MS1.count + MSMS.count + PRECURSORMZ + PRECURSORTYPE + rank ~ variable)
return(finder_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.