# setting table =====
# NOTE: debatable whether reset_missing = TRUE or FALSE should be the default
# went with TRUE to avoid situations of mixed peak table information as an unexpected default
#' Set peak table
#'
#' Set peak table for a collection of isofiles. Peak tables have to have at least a \code{file_id} column but are otherwise flexible. That said, isoprocessor functionality that operates on peak tables assumes that all columns with numbers are \code{\link[isoreader]{iso_double_with_units}} and that retention times are recorded in \code{rt_start}, \code{rt_end}, and \code{rt} (apex retention time) columns.
#'
#' This funciton will issue a warning if the \code{peak_table} has \code{file_id} values that are not part of the \code{iso_files}. It will reset peak tables for all \code{iso_files} whose \code{file_id} is not part of \code{peak_table} unless \code{reset_missing} is set to \code{FALSE}, in which case pre-existing peak table data will be preserved unaltered. To reset all peak tables, simply call this function without the \code{peak_table} parameter.
#'
#' @param iso_files iso file(s) for which to set peak tables
#' @param peak_table peak table data frame, must have a 'file_id' column at minimum (to match with the iso_files)
#' @param reset_missing whether to reset the \code{peak_table} of any \code{iso_files} whose\code{file_id} is not in the \code{peak_table}. If set to \code{FALSE}, will preserve pre-existing peak table data instead.
#' @family peak table functions
#' @export
iso_set_peak_table <- function(iso_files, peak_table = tibble(file_id = character(0)), reset_missing = TRUE, quiet = default(quiet)) {
# continuous flow file check
if (!isoreader::iso_is_continuous_flow(iso_files))
stop("peak table information can only be set for continuous flow files", call. = FALSE)
if (!is.data.frame(peak_table))
stop("peak_table must be a data frame", call. = FALSE)
if (!"file_id" %in% names(peak_table))
stop("peak_table does not have a 'file_id' column, cannot match to iso_files", call. = FALSE)
# single file
is_single <- iso_is_file(iso_files)
if (is_single) iso_files <- iso_as_file_list(iso_files)
# split peak_table
peak_table <- split(peak_table, peak_table$file_id)
# information
files_file_ids <- names(iso_files)
peaks_file_ids <- names(peak_table)
updates <- intersect(files_file_ids, peaks_file_ids)
resets <- setdiff(files_file_ids, peaks_file_ids)
warnings <- setdiff(peaks_file_ids, files_file_ids)
if (!quiet) {
glue::glue(
"Info: setting peak table for {length(updates)}/{length(files_file_ids)} iso files",
if(reset_missing && length(resets) > 0) "; resetting peak table for {length(resets)} files"
else if (!reset_missing && length(resets) > 0) "; keeping peak table unchanged for {length(resets)} files"
else "",
if (length(warnings) > 0) "; ignoring peak table data for {length(warnings)} unmatched file IDs"
else "",
"."
) %>% message()
}
# update iso files
iso_files <- map(
iso_files,
~{
if (.x$file_info$file_id %in% updates)
.x$peak_table <- dplyr::select(peak_table[[.x$file_info$file_id]], -file_id)
else if (reset_missing && .x$file_info$file_id %in% resets)
.x$peak_table <- tibble()
.x
}
)
if (is_single) return(iso_files[[1]])
else return(iso_as_file_list(iso_files))
}
# setting from vendor data table =====
#' Set peak table from vendor data
#'
#' Set the peak table from vendor data in each iso_file. Only the vendor data table columns that do exist in a file will be used, so this function can set peak table information across a variety of data files without errors. Provides a detailed info message about the columns that are used.
#'
#' @param direct_rename select which columns to select and rename directly from the vendor data table. Must be a named vector with the values the vendor data table column names and the names the corresponding peak table column names.
#' @param regex_rename select columns by regular expression and rename using \link[stringr]{str_replace}. Must be a named vector with the values the regular expression to find vendor data table columns and the names the replacement expression for the correponding peak table column names.
#' @family peak table functions
#' @export
iso_set_peak_table_from_vendor_data_table <- function(iso_files, direct_rename = c(), regex_rename = c(), quiet = default(quiet)) {
# continuous flow file check
if (!isoreader::iso_is_continuous_flow(iso_files))
stop("peak table information can only be set for continuous flow files", call. = FALSE)
# parameter checks
if (length(direct_rename) > 0 && (!is.character(direct_rename) || !rlang::is_named(direct_rename)))
stop("direct_rename must be a named character vector", call. = FALSE)
if (length(regex_rename) > 0 && (!is.character(regex_rename) || !rlang::is_named(regex_rename)))
stop("regex_rename must be a named character vector", call. = FALSE)
if (length(direct_rename) == 0 && length(regex_rename) == 0)
stop("must provide either direct_rename or regex_rename", call. = FALSE)
# single file
is_single <- iso_is_file(iso_files)
if (is_single) iso_files <- iso_as_file_list(iso_files)
# pull out all vendor data table columns
cols <- tibble(
file_id = names(iso_files),
col = purrr::map(
iso_files, ~
if(is.null(.x$vendor_data_table)) { character(0) }
else { names(.x$vendor_data_table) }
)
) %>%
tidyr::unnest(col)
# single cols
if (length(direct_rename) > 0)
direct_rename <- tibble(
fits = map(as.character(direct_rename), ~dplyr::filter(cols, col == .x)),
new = names(direct_rename)
) %>% tidyr::unnest(fits)
else
direct_rename <- tibble()
# regex cols
if (length(regex_rename) > 0)
regex_rename <- tibble(
fits = map2(
as.character(regex_rename),
names(regex_rename),
~ dplyr::filter(cols, stringr::str_detect(col, .x)) %>%
dplyr::mutate(new = stringr::str_replace(col, .x, .y))
)
) %>% tidyr::unnest(fits)
else
regex_rename <- tibble()
# all renames
all_cols <- vctrs::vec_rbind(direct_rename, regex_rename) %>%
mutate(idx = dplyr::row_number())
# info
if (!quiet) {
info <- all_cols %>%
group_by(file_id) %>%
summarize(label = paste(sprintf("'%s'->'%s'", col, new), collapse = ", ")) %>%
dplyr::count(label) %>%
mutate(label = sprintf(" - for %d file(s): %s", n, label)) %>%
arrange(desc(n))
glue::glue("Info: setting peak table for {length(iso_files)} file(s) from vendor data table with the following renames:\n",
paste(info$label, collapse = "\n")) %>%
message()
}
# generate mutates to account for potential column replication
# (maybe do this faster with mutates only where necessary?)
all_cols <- all_cols %>%
mutate(col_quo = purrr::map(col, ~sym(.x))) %>%
arrange(file_id, idx) %>%
{ split(., .$file_id) }
# update iso files
iso_files <- map(
iso_files,
~{
if (.x$file_info$file_id %in% names(all_cols)) {
mutate_quos <- with(all_cols[[.x$file_info$file_id]], setNames(col_quo, new))
.x$peak_table <- dplyr::mutate(.x$vendor_data_table, !!!mutate_quos) %>%
dplyr::select(!!!all_cols[[.x$file_info$file_id]]$new)
}
.x
})
if (is_single) return(iso_files[[1]])
else return(iso_as_file_list(iso_files))
}
#' renamed to iso_set_peak_table_automatically_from_vendor_data_table
#'
#' This function has been renamed to \link{iso_set_peak_table_automatically_from_vendor_data_table}. Please call \link{iso_set_peak_table_automatically_from_vendor_data_table} directly to avoid the warning.
#' @export
iso_set_peak_table_from_auto_vendor_data_table <- function(...){
warning("'iso_set_peak_table_from_auto_vendor_data_table' has been renamed to 'iso_set_peak_table_automatically_from_vendor_data_table'. Please call the latter function directly to avoid this warning.", immediate. = TRUE, call. = FALSE)
iso_set_peak_table_automatically_from_vendor_data_table(...)
}
#' @rdname iso_set_peak_table_from_vendor_data_table
#' @aliases iso_set_peak_table_automatically_from_vendor_data_table
#' @details \code{iso_set_peak_table_automatically_from_vendor_data_table} is the easiest way to set peak tables from vendor data because it will simply look at the file extension and decide which software specific function to use (e.g. \code{iso_set_peak_table_from_isodat_vendor_data_table}) for any Isodat files.
#' @export
iso_set_peak_table_automatically_from_vendor_data_table <- function(iso_files, quiet = default(quiet)) {
# continuous flow file check
if (!isoreader::iso_is_continuous_flow(iso_files))
stop("peak table information can only be set for continuous flow files", call. = FALSE)
# available functions
funcs <- tibble(
software = c("Isodat"),
func = c("iso_set_peak_table_from_isodat_vendor_data_table")
)
stopifnot(purrr::map_lgl(funcs$func, exists, where = "package:isoprocessor", mode = "function"))
# single file
is_single <- iso_is_file(iso_files)
if (is_single) iso_files <- iso_as_file_list(iso_files)
# find software file ids
software_file_ids <-
iso_files %>%
isoreader::iso_get_file_info(file_path, quiet = TRUE) %>%
dplyr::mutate(extension = stringr::str_extract(file_path, "\\.[^.]+$")) %>%
dplyr::left_join(isoreader::iso_get_supported_file_types(), by = "extension") %>%
dplyr::group_by(software) %>%
dplyr::summarize(file_ids = list(file_id)) %>%
dplyr::left_join(funcs, by = "software")
# safety checks
if (nrow(miss <- filter(software_file_ids, purrr::map_lgl(func, is.null) | is.na(func))) > 0) {
glue::glue(
"no specialized function available (yet) for using vendor data tables ",
"from the following software: ", paste(miss$software, collapse = ", ")) %>%
warning(call. = FALSE, immediate. = TRUE)
}
# apply functions
apply_software_files_ids <- filter(software_file_ids, !purrr::map_lgl(func, is.null), !is.na(func))
if (nrow(apply_software_files_ids) > 0) {
for (i in 1:nrow(apply_software_files_ids)) {
iso_files[apply_software_files_ids$file_ids[[i]]] <-
do.call(
apply_software_files_ids$func[[i]],
args = list(iso_files[apply_software_files_ids$file_ids[[i]]], quiet = quiet)
)
}
}
if (is_single) return(iso_files[[1]])
else return(iso_files)
}
#'
#' @rdname iso_set_peak_table_from_vendor_data_table
#' @aliases iso_set_peak_table_from_isodat_vendor_data_table
#' @details \code{iso_set_peak_table_from_isodat_vendor_data_table} provides specialized functionality to set peak table information from an Isodat vendor data tables. For compatibility with all downstream isoprocessor calculations, the resulting peak table has a very specific set of columns which are listed below. Mapping for Isodat data tables:
#' \itemize{
#' \item peak_nr: "Nr."
#' \item is_ref: "Is Ref.?"
#' \item rt_start: "Start"
#' \item rt: "Rt"
#' \item rt_end: "End"
#' \item ampX: "Ampl X"
#' \item bgrdX_start: "BGD X"
#' \item bgrdX_end: "BGD X" (Isodat only reports a single background amplitude)
#' \item areaX: "rIntensity X" (recorded intensities)
#' \item rX/Y: "rR X/Y" (recorded ratios)
#' \item rX/Y_ref: extrapolated reference ratio at the peak (not available from Isodat peak table)
#' \item rdX/Y: "rd X/Y" (recorded delta -> rX/Y / rX/Y_ref - 1, not shifted for true isotopic value of the ref)
#' \item dX/Y: "d X/Y" (delta frame shifted wrt to the true isotopic value of the ref; note that if this is a single element delta keeps just the numerator with the element symbol to fit conventional naming, e.g. d13C or d18O; whereas for a molecule omits the formula and keeps the mass ratios, e.g. d45/44)
#' \item atX: "AT%% X/Y"
#' }
#' @export
iso_set_peak_table_from_isodat_vendor_data_table <- function(iso_files, quiet = default(quiet)) {
direct_rename <- c(peak_nr = "Nr.", is_ref = "Is Ref.?",
rt_start = "Start", rt = "Rt", rt_end = "End")
regex_rename <- c("amp\\1" = "^Ampl (\\d+)$",
"bgrd\\1_start" = "^BGD (\\d+)$", "bgrd\\1_end" = "^BGD (\\d+)$",
"area\\1" = "^rIntensity (\\d+)$",
"r\\1/\\2" = "^rR (\\d+)[^/]*\\/(\\d+).*$",
"rd\\1/\\2" = "^rd (\\d+)[^/]*\\/(\\d+).*$",
"d\\1/\\2" = "^d (\\d+)[^0-9][^/]+\\/(\\d+).*$",
"d\\1" = "^d (\\d+[^/])\\/(\\d+).*$",
"at\\1" = "^AT\\% (\\d+[^/]*)\\/(\\d+).*$")
iso_set_peak_table_from_vendor_data_table(
iso_files,
direct_rename = direct_rename,
regex_rename = regex_rename,
quiet = quiet
)
}
# mutate peak table =====
#' Mutate peak table
#'
#' Mutate a peak table data frame either stand-alone (\code{\link{iso_get_peak_table}}) or within isofile objects. Works just like dplyr's \link[dplyr]{mutate} for changing existing columns or introducing new ones, except that it provides an information message and preserves column units even in grouped operations (\link[dplyr]{mutate} should gain this functionality in version 0.9).
#'
#' @inheritParams isoreader::iso_get_raw_data
#' @param ... dplyr-style \link[dplyr]{mutate} conditions applied to the combined peak_table (see \code{\link{iso_get_peak_table}})
#' @family peak table functions
#' @export
iso_mutate_peak_table <- function(iso_files, ..., quiet = default(quiet)) {
UseMethod("iso_mutate_peak_table")
}
#' @export
iso_mutate_peak_table.default <- function(iso_files, ..., quiet = default(quiet)) {
stop("this function is not defined for objects of type '",
class(iso_files)[1], "'", call. = FALSE)
}
#' @export
iso_mutate_peak_table.iso_file <- function(iso_files, ..., quiet = default(quiet)) {
iso_mutate_peak_table(iso_as_file_list(iso_files), ..., quiet = quiet)[[1]]
}
#' @rdname iso_mutate_peak_table
#' @param group_by a single \link[dplyr]{group_by} column to group by before applying the mutate statement
#' @export
iso_mutate_peak_table.iso_file_list <- function(iso_files, ..., group_by = NULL, quiet = default(quiet)) {
# continuous flow file check
if (!isoreader::iso_is_continuous_flow(iso_files))
stop("peak tables can only exist in continuous flow files", call. = FALSE)
# group by
group_by_quo <- enquo(group_by)
# information
if (!quiet) {
glue::glue(
"Info: mutating peak table for {length(iso_files)} data file(s)",
if (!rlang::quo_is_null(group_by_quo) > 0)
" grouped by '{rlang::as_label(group_by_quo)}'" else "") %>%
message(appendLF = FALSE)
}
# mutate iso_files' file info
peak_table <- iso_get_peak_table(iso_files, quiet = TRUE)
mutate_quos <- rlang::enquos(...)
new_cols <- names(mutate_quos)
original_cols <- names(peak_table)
peak_table <- iso_mutate_peak_table(peak_table, !!!mutate_quos, group_by = !!group_by_quo, quiet = TRUE)
split_peak_table <- split(peak_table, peak_table$file_id)
# information
if (!quiet) {
update_cols <- intersect(new_cols, original_cols)
really_new_cols <- setdiff(new_cols, original_cols)
glue::glue(
if (length(update_cols) > 0) ", column(s) '{glue::glue_collapse(update_cols, sep = \"', '\")}' updated" else "",
if (length(really_new_cols) > 0) ", column(s) '{glue::glue_collapse(really_new_cols, sep = \"', '\")}' added" else "",
".") %>%
message()
}
# mutate this way to ensure that only original columns are kept rather than
# a bunch of NA columns if peak_tables differ between iso_files
iso_files <- map(
iso_files,
~{
if (!is.null(.x$peak_table)) {
original_cols <- names(.x$peak_table)
.x$peak_table <- split_peak_table[[.x$file_info$file_id]][unique(c(original_cols, new_cols))]
}
.x
})
# return
return(iso_as_file_list(iso_files))
}
#' @rdname iso_mutate_peak_table
#' @export
#' @param peak_table data frame with the peak table (can be grouped prior to this function call or via the \code{group_by} parameter)
iso_mutate_peak_table.data.frame <- function(peak_table, ..., group_by = NULL, quiet = default(quiet)) {
# safety check
if (nrow(peak_table) == 0) return(peak_table)
# grouping columns
group_by_quo <- enquo(group_by)
if (!rlang::quo_is_null(group_by_quo)) peak_table <- dplyr::group_by(peak_table, !!group_by_quo)
grp_vars <- dplyr::group_vars(peak_table) %>% { setNames(., .) }
# information
if (!quiet) {
glue::glue(
"Info: mutating peak table",
if (length(grp_vars) > 0) " grouped by '{glue::glue_collapse(grp_vars, sep = \"', '\")}'" else ""
) %>% message(appendLF = FALSE)
}
# quos
mutate_quos <- rlang::enquos(...)
new_cols <- names(mutate_quos)
original_cols <- names(peak_table)
# mutate by nesting to preserve units
# this should work directly once dplyr v. 0.9 is released
if (length(grp_vars) > 0) {
peak_table <- peak_table %>%
tidyr::nest(data = c(!!!map(grp_vars, ~quo(-!!sym(.x))))) %>%
dplyr::mutate(
data = purrr::map(data, ~dplyr::mutate(.x, !!!mutate_quos))
) %>%
tidyr::unnest(data)
} else {
peak_table <- dplyr::mutate(peak_table, !!!mutate_quos)
}
# information
if (!quiet) {
update_cols <- intersect(new_cols, original_cols)
really_new_cols <- setdiff(new_cols, original_cols)
glue::glue(
if (length(update_cols) > 0) ", column(s) '{glue::glue_collapse(update_cols, sep = \"', '\")}' updated" else "",
if (length(really_new_cols) > 0) ", column(s) '{glue::glue_collapse(really_new_cols, sep = \"', '\")}' added" else "",
".") %>%
message()
}
# ungroup
if (!rlang::quo_is_null(group_by_quo)) peak_table <- dplyr::ungroup(peak_table)
# return values
return(peak_table)
}
# aggregation ======
#' Aggregate peak table
#'
#' Aggregate peak table from continous flow files. The return peak table has implicit units, to make the units explicit, use \code{\link[isoreader]{iso_make_units_explicit}}.
#'
#' @inheritParams isoreader::iso_get_vendor_data_table
#' @family peak table functions
#' @export
iso_get_peak_table <- function(
iso_files, select = everything(), include_file_info = NULL, with_explicit_units = FALSE, quiet = default(quiet)) {
# parameters
iso_files <- isoreader::iso_as_file_list(iso_files)
include_file_info_quo <- rlang::enquo(include_file_info)
# continuous flow file check
if (!isoreader::iso_is_continuous_flow(iso_files))
stop("peak table information is only available in continuous flow files", call. = FALSE)
# info message
if (!quiet) {
sprintf(
"Info: aggregating peak table%s from %d data file(s)%s",
if (with_explicit_units) " with explicit units" else "",
length(iso_files),
isoreader:::get_info_message_concat(include_file_info_quo, prefix = ", including file info ")) %>%
message()
}
# check whether there are any files
if (length(iso_files) == 0) return(tibble())
# get vendor data
column <- units <- NULL # global vars
# fetch peak table data
peak_table <-
# fetch data
tibble(
file_id = names(iso_files),
pt = map(iso_files, ~.x$peak_table),
has_pt = map_lgl(pt, ~!is.null(.x))
)
# warnings
if (all(!peak_table$has_pt)) {
warning("none of the iso files has a peak_table yet. To use peak table functionality, make sure to create a peak table by either adopting the vendor_data_table (?iso_set_peak_table_automatically_from_vendor_data_table), finding peaks (?iso_find_peaks), or setting a table manually (?iso_set_peak_table).", call. = FALSE, immediate. = TRUE)
}
# make sure to include only existing data
peak_table <- filter(peak_table, has_pt)
# check for any rows
if (nrow(peak_table) == 0) return(tibble(file_id = character(0)))
# make units explicit if wanted
if (with_explicit_units) {
peak_table <- peak_table %>%
mutate(pt = map(pt, isoreader::iso_make_units_explicit))
}
# unnest
peak_table <- dplyr::select(peak_table, file_id, pt) %>% unnest(pt)
# get include information
select_cols <- get_column_names(peak_table, select = enquo(select), n_reqs = list(select = "*"), cols_must_exist = FALSE)$select
if (!"file_id" %in% select_cols)
select_cols <- c("file_id", select_cols) # file info always included
# focus on selected columns only (also takes care of the rename)
peak_table <- dplyr::select(peak_table, !!!select_cols)
# include file info
if (!quo_is_null(include_file_info_quo)) {
info <- iso_get_file_info(iso_files, select = !!include_file_info_quo, quiet = TRUE)
peak_table <- right_join(info, peak_table, by = "file_id")
}
return(peak_table)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.