#' @title Format InChIKeys
#'
#' @description
#' check validity of InChIKey, return NA for those which are not valid
#'
#' @param inchikeys vector of characters
#'
#' @return vector of characters (InChIKeys), filled with NA values where InChIKey were not valid
#'
#' @examples
#' \dontrun{format_inchikey("BQJCRHHNABKAKU-KBQPJGBKSA-N")}
format_inchikey <- function(inchikeys) {
valid <- check_inchikey(inchikeys)
inchikeys[which(!valid)] <- NA
inchikeys
}
#' @title Test validity of InChIKey(s)
#'
#' @description
#' Test if InChIKey is valid or not. Inchikeys must respect some rules:
#' \itemize{
#' \item only uppercase letters
#' \item length of 27
#' \item 3 parts sep by "-"
#' \item 1st part of 14, 2nd of 10, 3rd of one
#' \item 24th must be S or N
#' \item 25th must be A.
#' }
#'
#' @param inchikeys vector of characters
#'
#' @return vector of boolean of same length than the inchikey parameter (TRUE if valid, FALSE if not)
#'
#' @examples
#' \dontrun{check_inchikey("BQJCRHHNABKAKU-KBQPJGBKSA-N")}
check_inchikey <- function(inchikeys) str_detect(inchikeys,
"^[:upper:]{14}-[:upper:]{8}[SN]A-[:upper:]$")
#' @title Test validity of CAS
#'
#' @description
#' Test the vaidity of CAS. CAS must respect some rules:
#' \itemize{
#' \item only digit
#' \item cas must be 3 parts sep by "-"
#' \item 1st part of 2 up to 7 digits
#' \item 2nd of 2 digits
#' \item 3rd of 1 (check digit)
#' \item the check digit must be equal to the last digit times 1,
#' the preceding digit times 2, the preceding digit times 3 etc, ...,
#' adding all these up and computing the sum modulo 10.
#' }
#'
#' @param cas vector of characters
#'
#' @return vector of boolean of same length than the cas parameter (TRUE if valid, FALSE if not)
#'
#' @examples
#' \dontrun{check_cas("64-17-5")}
check_cas <- function(cas) str_detect(cas,
"^[:digit:]{2,7}-[:digit:]{2}-[:digit:]$") &
sapply(stringr::str_split(gsub("-", "", cas), ""), function(x) {
x <- as.numeric(rev(x))
x[1] == sum(x[-1] * 1:(length(x) - 1)) %% 10
})
#' @title Format chemical formulas
#'
#' @description
#' Format chemical formulas
#' \itemize{
#' \item remove white spaces at begin & end
#' \item check validity of formula:
#' \enumerate{
#' \item isotope designation: number between []
#' \item element: an upper letter followed sometimes by a lower letter
#' \item number(s)
#' }
#' }
#'
#' @param formulas vector of characters of chemical formulas
#'
#' @return vector of characters (chemical formulas), filled with NA value where the formula were not valid
#'
#' @examples
#' \dontrun{format_formula(c("C12H18Br6", "C12[2]H18Br6"))}
format_formula <- function(formulas) {
formulas[which(!str_detect(formulas,
"^((\\[[:digit:]+\\])?[:upper:][:lower:]?[:digit:]*)+$"))] <- NA
formulas <- stringr::str_extract_all(formulas,
"(\\[[:digit:]+\\])?[:upper:][:lower:]?[:digit:]*")
utils::data(isotopes_df, package = "metabSeek")
sapply(formulas, format_formula_vec, isotopes_df)
}
format_formula_vec <- function(formula_vec, isotopes_df) {
if (any(is.na(formula_vec))) return(NA)
form <- matrix(c(
stringr::str_extract(formula_vec, "^(\\[[:digit:]+\\])?[:upper:][:lower:]?"),
stringr::str_extract(formula_vec, "[:digit:]*$")), ncol = 2)
form <- form[which(form[, 1] %in% isotopes_df[, 1]), , drop = FALSE]
if (nrow(form) == 0) return(NA)
form[which(form[, 2] == ""), 2] <- 1
paste(t(form), sep = "", collapse = "")
}
#' @title Get basepeak m/z
#'
#' @description
#' Get basepeak m/z
#'
#' @param formulas vector of characters representing chemical formulas
#' @param charges vector of integers representing charges for each chemical formulas (same length as the parameter formulas)
#'
#' @return vector of floats (basepeak m/z for each couple of formula - charge)
#'
#' @examples
#' \dontrun{get_basepeak_mass("C12H18Br6", 0)}
get_basepeak_mass <- function(formulas, charges) {
utils::data(isotopes_df, package = "metabSeek")
masses <- rep(NA, length(formulas))
# envipat cannot deal at the same time molecule with a charge of 0 and naturally charged compounds ??!!!
for (charge in unique(charges)) {
if (is.na(charge)) next
ids <- which(charges == charge & !is.na(formulas))
isotopic_patterns <- enviPat::isopattern(isotopes_df, formulas[ids],
threshold = 90, charge = charge, verbose = FALSE)
masses[ids] <- sapply(isotopic_patterns, function(x) x[1, 1])
}
masses
}
#' @title convert charged m/z to neutral mass
#'
#' @description
#' convert charged m/z to neutral mass with a list of predefined adducts
#' reject those with a mass under or equal to 0
#'
#' @param mzs vector of floats, represent m/z
#' @param adducts vector of strings, represent name of the adduct
#'
#' @return matrix with columns
#' \enumerate{
#' \item adduct
#' \item ion_mz
#' \item mass
#' }
#'
#' @examples
#' \dontrun{mzs_to_mass(443.1247895, c("M+H", "M+2H"))}
mzs_to_mass <- function(mzs, adducts) {
utils::data(adducts_df, package = "metabSeek")
adducts <- adducts_df[which(adducts_df[, 1] %in% adducts), ]
result <- do.call(rbind, lapply(mzs, function(mz)
matrix(c(rep(mz, nrow(adducts)), adducts[, 1],
mz * adducts$Charge / adducts$Mult + adducts$Mass),
ncol = 3, dimnames = list(c(), c("mz", "adduct", "mass")))))
result[which(as.numeric(result[, "mass"]) > 0), , drop = FALSE]
}
#' @title Get mass error range
#'
#' @description.
#' Compute mass error range with a tolerance in ppm and / or in mda
#'
#' @param mass vector of floats representing masses
#' @param ppm float, optional, 0 by default
#' @param mda float, optional, 0 by default
#'
#' @return matrix with two column: one row for each mass provided in the masses param, columns represent borns min & max
#'
#' @examples
#' \dontrun{get_mass_range(c(443.1247985, 190.053578), ppm = 2)}
#' \dontrun{get_mass_range(c(443.1247985, 190.053578), mda = 1)}
get_mass_range <- function(mass, ppm = 0, mda = 0) {
mass_range <- if (ppm > 0) matrix(c(mass - (mass * ppm * 10**-6),
mass + (mass * ppm * 10**-6)), ncol = 2)
else matrix(rep(mass, 2), ncol = 2)
mass_range[, 1] <- mass_range[, 1] - (mda * 10**-3)
mass_range[, 2] <- mass_range[, 2] + (mda * 10**-3)
mass_range
}
# just a remember
mda_to_ppm <- function(mass, mda) mda * 10**3 * mass
ppm_to_mda <- function(mass, ppm) ppm * 10**-3 / mass
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.