Nothing
#' @title Extract Metadata from UFZ FTMS Filenames
#'
#' @description This function extracts metadata from XML filenames following the UFZ FTMS naming conventions.
#' It parses elements like sample ID, position, date, and retention time, organizing them into a structured data.table.
#'
#' @name extract_metadata_from_ufz_files
#' @family internal functions
#' @keywords metadata internal
#' @import data.table
# @importFrom tcltk tk_choose.dir
#'
#' @param folder_path (Optional) The path to the directory containing the XML files.
#' @param file_type (Default: ".xml")
#' If not provided, the user will be prompted to choose a file path interactively.
#'
#' @return A data.table containing extracted metadata fields from each filename. The columns are:
#' \itemize{
#' \item \code{sample_id}: Identifier for the sample.
#' \item \code{sample_id_ufz}: Identifier specific to UFZ's format, if available.
#' \item \code{position}: Position or condition identifier in the experiment.
#' \item \code{date}: Experiment date, formatted as \code{Date}.
#' \item \code{segment}: Segment information related to time or experiment phase.
#' \item \code{ret_time}: Retention time range within the segment.
#' \item \code{file_long}: Original filename after format adjustments.
#' \item \code{file}: Filename without the XML extension.
#' \item \code{link_rawdata}: Original filename as a link to raw data.
#' \item \code{ID}: Unique row identifier for each entry.
#' }
#'
# @examples
# # Assuming the folder_path variable is defined as the directory containing XML files
# extract_metadata_from_ufz_files(folder_path)
#
#' @details
#' This function reads XML filenames from a specified folder and splits their components into structured metadata fields. It processes the filenames to ensure a consistent format by replacing an underscore preceding the 4-digit sample number with a hyphen. The function then extracts key information (e.g., sample ID, experiment date, retention time) based on the UFZ FTMS naming conventions and outputs a tidy data.table.
#'
#' The expected filename format is as follows:
#' - Standard: \code{104B12_9557_RB3_10-12-2023_Segment1_1-2min.xml}
#' - Exception with additional underscore in the first part: \code{srfa_mcs_9554_GA2_10-12-2023_Segment1_1-2min.xml}
extract_metadata_from_ufz_files <- function(folder_path = NULL, file_type = NULL) {
full_path <- link_rawdata <- file_long <- ID <- NULL
V1 <- V2 <- V3 <- V4 <- V5 <- V6 <- V7 <- V8 <- NULL
position <- ret_time <- sample_id_ufz <- segment <- sample_id <- NULL
if(is.null(folder_path)){
file_type <- "\\.xml$"
} else {
file_type <- paste0("\\", file_type, "$")
}
if(is.null(folder_path)){
# folder_path <- tcltk::tk_choose.dir(default = "//smb.isibhv.dmawi.de/projects-noreplica/p_ume/Spektren FTMS", caption = "Select a folder containing XML peaklists")
folder_path <- file.choose()
}
#folder_path <- "\\\\smb.isibhv.dmawi.de\\projects-noreplica\\p_ume\\Spektren FTMS\\UFZ\\UFZ 2025.03 DOGMA"
folder_path <- dirname(folder_path)
# Get list of XML files in the folder
if(file_type == "\\.d$"){
all_dirs <- list.dirs(folder_path, full.names = TRUE, recursive = TRUE)
file_list <- all_dirs[grepl("\\.d$", basename(all_dirs))] |> data.table()
} else {
file_list <- list.files(path = folder_path, pattern = file_type, full.names = TRUE, recursive = TRUE) |> data.table()
}
setnames(x = file_list, old = "V1", new = "full_path", skip_absent = T)
file_list[, file := basename(full_path)]
file_list[, link_rawdata := file]
# Replace undesired underscore before the four-digit sample ID with a hyphen
# file_list[, file := sub("^(.*)_([^_]+_\\d{4})", "\\1-\\2", file)]
# Split filenames into columns
spectra <- file_list[, tstrsplit(file, "_", fixed=TRUE)]
# Combine with original file list and set column names
spectra <- cbind(spectra, file_list)
spectra
#setnames(spectra, c("V1", "V2", "V3", "V4", "V5", "V6"), c("sample_id", "sample_id_ufz", "position", "date", "segment", "ret_time"))
#setnames(spectra, c("V1", "V2", "V3", "V4", "V5", "V6"), c("sample_id", "position", "pos2", "sample_id_ufz", "segment", "ret_time"))
#setnames(spectra, c("V1", "V2", "V3", "V4", "V5"), c("sample_id", "position", "sample_id_ufz", "segment", "ret_time"))
setnames(spectra, old = names(spectra)[1:8], new = c("tag1", "tag2", "tag3", "sample_id_ufz", "position", "date", "segment", "ret_time"))
spectra[, 1:5]
spectra[is.na(sample_id), .N, .(V1, V2, V3, sample_id)]
spectra[V1 %in% c("HI") & V1 != "SML2", sample_tag:=paste(V1, V2, V3)]
spectra[V1 %in% c("M3ULW"), sample_tag := "M3 ULW"]
spectra[V1 %in% c("POOL", "SRFA", "BLK"), sample_tag := V1]
spectra[V1 %in% c("PR", "MQ"), sample_tag := paste(V1, V2)]
spectra[is.na(sample_tag), sample_tag := paste(V1, V2)]
spectra[V3 == 1624, sample_tag := paste(V1, V2)]
spectra[V2 %like% "16", sample_id_ufz:=V2]
spectra[V3 %like% "16", sample_id_ufz:=V3]
spectra[V4 %like% "16", sample_id_ufz:=V4]
spectra[V3 %like% "RE", position:=V3]
spectra[V3 %like% "BE", position:=V3]
spectra[V4 %like% "B", position:=V4]
spectra[V4 %like% "R", position:=V4]
spectra[V5 %like% "B", position:=V5]
spectra[V5 %like% "R", position:=V5]
spectra[V4 %like% "Segment", segment:=V4]
spectra[V5 %like% "Segment", segment:=V5]
spectra[V6 %like% "Segment", segment:=V6]
spectra[V7 %like% "Segment", segment:=V7]
spectra[V4 %like% "2025-", date:=V4]
spectra[V5 %like% "2025-", date:=V5]
spectra[V6 %like% "2025-", date:=V6]
spectra[V6 %like% "min.xml", ret_time:=V6]
spectra[V7 %like% "min.xml", ret_time:=V7]
spectra[V8 %like% "min.xml", ret_time:=V8]
spectra[, .N, V6]
names(spectra)
spectra[is.na(ret_time), .N, .(V1, V2, V3, V4, V5, V6, V7, V8, sample_tag, sample_id_ufz, position, segment, date)]
spectra[, .N, .(V1, V2, V3, V4, sample_tag, sample_id_ufz)]
# Process columns as needed
spectra[, full_path := NULL]
spectra[, date := as.Date("2024-11-14", "%Y-%m-%d")]
spectra[, file_long := file]
spectra[, file := gsub(".xml", "", file)]
spectra[, ID := 1:nrow(spectra)]
# Check if entries in 'sample_id' contain letters
if(any(grepl("[a-zA-Z]", spectra$sample_id))){
setnames(spectra, old = "sample_id", new = "sample_tag")
}
return(spectra[])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.