Nothing
#' @title Convert Data Table with Element Counts to Molecular Formulas
#'
#' @description
#' Creates a character vector of molecular formulas and adds it as a column to the input `data.table`.
#' The molecular formula string follows the **Hill system order** for element arrangement.
#' If keep_element_sums == TRUE, a data.table is returned that also provides
#' the sum of atoms of each element in the molecular formula.
#'
#' @details
#' This function extracts element or isotope counts from a table with columns for each element of a molecular formula,
#' including those with isotopic notation.
#' It ensures that only valid elements are included based on a reference table (`masses`).
#'
#' The function internally uses the `ume::masses` table that contains element and isotopic symbols.
#'
#' @inheritParams main_docu
#' @param isotope_formulas Logical. If `TRUE` the output table will have an additional
#' molecular formula string that includes isotope information (e.g. "[12C5][13C1][1H12][16O6]")
#' @param keep_element_sums description. If `TRUE` the output table will have
#' additional columns containing the total count of atoms of an element (e.g. `S_tot`).
#' @return The original table `mfd` as data.table having additional columns:
#' \describe{
#' \item{mf}{Standardized molecular formula following the Hill order.}
#' \item{mf_iso}{If `isotope_formulas = TRUE`:
#' Standardized molecular formula considering all isotopes of an element.}
#' \item{C_tot}{If `keep_element_sums = TRUE`:
#' The total count of all atoms that are carbon isotopes (similar for all other elements.}
#' }
#'
#' @section Notes:
#' - The function correctly handles isotopic notations such as `[13C]` and `[18O2]`.
#' - The output follows the **Hill order**, meaning **C, H first**, followed by other elements in alphabetical order.
#' - Single-element counts (e.g., `C1H4` → `CH4`) are formatted without explicit `1`.
#'
#' @import data.table
#' @examples
#' convert_data_table_to_molecular_formulas(mf_data_demo[, .(`12C`, `1H`, `14N`, `16O`, `31P`, `32S`)])
#' @family molecular formula functions
#' @keywords chemistry molecular-formula
#' @export
convert_data_table_to_molecular_formulas <- function(mfd,
isotope_formulas = FALSE,
keep_element_sums = FALSE,
verbose = FALSE,
...) {
keep_cols <- new_name <- orig_name <- mf_iso <- NULL
# Create a key for each line in mfd if not already existing
if(!"vkey" %in% names(mfd)){
mfd[, vkey:=.I]
}
# Verify which columns have element or isotope information
iso_cols <- get_isotope_info(mfd, ...)
# Rename isotope columns in mfd to match official nomenclature
setnames(mfd, iso_cols$orig_name, iso_cols$label, skip_absent = TRUE)
# Make sure all isotope columns are integer type
mfd[, (iso_cols$label) := lapply(.SD, as.integer), .SDcols = iso_cols$label]
# Step 1: Reshape the data table by stacking the elements and their counts
dt_long <- data.table::melt(mfd, measure.vars = iso_cols$label, variable.name = "label",
value.name = "count", id.vars = "vkey", variable.factor = F)
# Filter out rows with zero counts
dt_long <- dt_long[count > 0]
# Join isotope information
dt_long <- iso_cols[, .(hill_order, label, symbol)][dt_long, on = "label"] # add hill_order
# Order for each formula and the hill order
setkeyv(dt_long, c("vkey", "hill_order")) # order data.table
# Build formula strings for the standard molecular formula
df_mf <-
dt_long[, .(count_element = sum(count)), by = .(vkey, symbol)] # sum up isotopes of the same element
df_mf[count_element == 1, mf := symbol] # substring if element is n=1
if(keep_element_sums){
df_mf_sums <- dcast(df_mf, vkey ~ symbol, value.var = "count_element", fill = 0)
setnames(df_mf_sums, names(df_mf_sums)[-1], paste0(names(df_mf_sums)[-1], "_tot"))
}
df_mf[count_element > 1, mf := paste0(symbol, count_element)] # substring if element is n>1
# remove unnecessary columns
df_mf[, c("symbol", "count_element"):=NULL]
# This concatenates the sub-strings and takes most of the time!!
if(verbose) message("Creating molecular formula string...")
df_mf <- df_mf[, lapply(.SD, paste0, collapse = ""), by = .(vkey)]
# Build formula strings that include all isotopes
if(isotope_formulas == T){
df_mf_iso <-
dt_long[, .(count_element = sum(count)), by = .(vkey, label)] # sum up isotopes of the same element
df_mf_iso[count_element == 1, mf_iso := paste0("[", label, "]")] # substring if element is n=1
df_mf_iso[count_element > 1, mf_iso := paste0("[", label, count_element, "]")] # substring if element is n>1
# remove unnecessary columns
#keep_cols <- names(df_mf_iso)[names(df_mf_iso) %in% c("vkey", "mf", "mf_iso")]
df_mf_iso[, c("label", "count_element") := NULL]
if(verbose) message("Creating molecular formula string with isotope information...")
#setkey(df_mf, vkey)
df_mf_iso <- df_mf_iso[, lapply(.SD, paste0, collapse = ""), by = .(vkey)]
# Join df_mf and df_mf_iso
df_mf <- df_mf_iso[df_mf, on = "vkey"]
}
# Join results with original data.table df
mfd <-
df_mf[mfd, on = c("vkey")] # join the mf strings with original datatable
if(keep_element_sums){
df_mf_sums[mfd, on = "vkey"]
}
if(verbose) message("Molecular formula strings created.")
return(mfd)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.