#' Import Uncle Tm/Tagg DLS summary into R
#'
#' \code{import_DLSsum}
#'
#' @param directory_path a path to a directory containing the exported .xlsx files
#' @param pattern a regex pattern for further selecing files in the directory,
#' defaults to "DLS Sum"
#' @param sheet character string to specify sheet if multi-sheet workbook is exported
#' @param temp_cutoff numeric value, excluding all DLS data obtained at temperatures above this value, default is 100 (°C)
#' @param header if TRUE skips first 4 rows of .xlsx file to remove Uncle header, default is FALSE
#' @param combine if TRUE, returns all imported data merged into one unified dataframe with an "origin" column listing the original file path,
#' FALSE will return a list of dataframes; default is TRUE
#' @return a named (with filename) list of dataframes or a single merged dataframe
#' @export
import_DLSsum <- function(directory_path, pattern = "DLS Sum", sheet = NULL, temp_cutoff = 25, header = FALSE, combine = TRUE) {
if (!(header %in% c(TRUE, FALSE))) {
stop("argument header must be TRUE or FALSE")
}
if (!(combine %in% c(TRUE, FALSE))) {
stop("argument combine must be TRUE or FALSE")
}
skip <- 0
if (header) {
skip <- 5
}
file_list <- list.files(directory_path, pattern = pattern, full.names = TRUE) |> {
\(l) rlang::set_names(l,
nm = purrr::map_chr(
l,
stringr::str_extract, "\\d{6}(?!/).*$"
)
)
}()
df_list <- purrr::map(file_list, readxl::read_excel, sheet = sheet, col_types = "text", skip = skip)
names_list <- purrr::map(
df_list,
function(df) {
recode_values <- c(
"color" = grep("color", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"uni" = grep("well", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"sample" = grep("sample", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"temp_C" = grep("(?=.*T)(?=.*\U00B0)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"Z_D" = grep("(?=.*Z-Ave)(?=.*Dia)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"Z_diffcoeff" = grep("(?=.*Z-Ave)(?=.*Diff)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"Z_D_SD" = grep("(?=.*SD)(?=.*Dia)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"PdI" = grep("PDI", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"fitVar" = grep("Fit Var", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"mcr_cps" = grep("^(?=intensity)(?=.*cps)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak1_D" = grep("(?=.*Pk 1)(?=.*Dia)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak1_MW" = grep("(?=.*Pk 1)(?=.*(M.W.|MW))", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak1_poly" = grep("(?=.*Pk 1)(?=.*polydispersity)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak1_mass" = grep("(?=.*Pk 1)(?=.*mass)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak1_diffcoeff" = grep("(?=.*Pk 1)(?=.*(coeff|co-eff))", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak2_D" = grep("(?=.*Pk 2)(?=.*Dia)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak2_MW" = grep("(?=.*Pk 2)(?=.*(M.W.|MW))", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak2_poly" = grep("(?=.*Pk 2)(?=.*polydispersity)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak2_mass" = grep("(?=.*Pk 2)(?=.*mass)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak3_D" = grep("(?=.*Pk 3)(?=.*Dia)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak3_MW" = grep("(?=.*Pk 3)(?=.*(M.W.|MW))", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak3_poly" = grep("(?=.*Pk 3)(?=.*polydispersity)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"peak3_mass" = grep("(?=.*Pk 3)(?=.*mass)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"filter" = grep("filter", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"viscosity" = grep("(?=.*viscosity)(?=.*cp)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"RefI" = grep("^RI$", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"dcr_cps" = grep("(?=.*derived)(?=.*intensity)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"min_pk_area" = grep("(?=.*min)(?=.*area)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE),
"min_Rh" = grep("(?=.*min)(?=.*Rh)", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE)
)
return(recode_values)
}
)
recode_values <- purrr::map(
names_list,
function(named_vector) {
swaped_vector <- names(named_vector)
names(swaped_vector) <- purrr::as_vector(named_vector)
return(swaped_vector)
}
)
renamed_list <- purrr::map2(
df_list,
recode_values,
function(df, values) {
names(df) <- names(df) %>% dplyr::recode(!!!values)
return(df)
}
)
vars_parse <- c(
"temp_C",
"Z_D",
"Z_diffcoeff",
"Z_D_SD", "PdI",
"fitVar",
"mcr_cps",
"peak1_D",
"peak1_MW",
"peak1_poly",
"peak1_mass",
"peak1_diffcoeff",
"peak2_D",
"peak2_MW",
"peak2_poly",
"peak2_mass",
"peak3_D",
"peak3_MW",
"peak3_poly",
"peak3_mass",
"viscosity",
"RefI",
"dcr_cps",
"min_pk_area",
"min_Rh"
)
parsed_list <- purrr::map2(
renamed_list,
names(df_list),
function(df, name) {
df %>%
dplyr::select(-color) %>%
purrr::modify_at(.at = vars_parse, readr::parse_number, na = c(">1000", "Out of Range", "-", NA, NaN, "\U221E")) %>%
purrr::modify_if(is.double, round, digits = 2) %>%
dplyr::filter(temp_C < temp_cutoff) %>%
tibble::add_column(mode_Z = purrr::pmap_dbl(dplyr::select(., tidyselect::matches("peak\\d{1}_D$")), function(...) length(c(...)[!is.na(c(...))])), .after = "Z_D") %>%
tibble::add_column(
file_name = stringr::str_extract(name, "(?<=//).*(?=\\.xlsx)"),
.before = "uni"
)
}
)
if (combine == TRUE) {
return(
dplyr::bind_rows(parsed_list, .id = "origin") %>%
dplyr::mutate(
origin = dplyr::if_else(
stringr::str_detect(.$origin, stringr::regex("\\.uni.*$")),
stringr::str_extract(.$origin, stringr::regex(".*(?=\\.uni)", ignore_case = TRUE)),
stringr::str_extract(.$origin, stringr::regex(".*(?=\\.(xls|xlsx))", ignore_case = TRUE))
)
) %>%
tidyr::separate(origin, c("date", "instrument", "protein", "plate", "file"), sep = "-") %>%
dplyr::select(-file_name)
)
} else {
return(parsed_list)
}
}
#' Import Uncle DLS spectra into R
#'
#' \code{import_DLSspec}
#'
#' @param directory_path a path to a directory containing the exported .xlsx files
#' @param pattern a regex pattern for further selecing files in the directory;
#' defaults to NULL to force user input that discriminates intensity from mass DLS files
#' @param type a character string, "C", "I", or "M", to signify if the data is correlation function, intensity, or mass distribution DLS spectra
#' @param header if TRUE skips first 3 rows of .xlsx file to remove Uncle header; default is TRUE
#' @param combine if TRUE, returns all imported data merged into one unified dataframe with an "origin" column listing the original file path,
#' FALSE will return a list of dataframes; default is TRUE
#' @return a named (with filename) list of dataframes or a single merged dataframe
#' @export
import_DLSspec <- function(directory_path, pattern = NULL, type = NA, header = TRUE, combine = TRUE) {
if (missing(pattern) | is.null(pattern)) {
stop("you must specify a search pattern to select the appropriate DLS files, e.g. 'DLS Spec I' or 'DLS Spec M'")
}
if (missing(type) | !(type %in% c("C", "I", "M"))) {
stop("DLS spectra type is required: 'C' for correlation, 'I' for intensity, 'M' for mass")
}
if (!(header %in% c(TRUE, FALSE))) {
stop("argument header must be TRUE or FALSE")
}
if (!(combine %in% c(TRUE, FALSE))) {
stop("argument combine must be TRUE or FALSE")
}
skip <- 3
if (!(header)) {
skip <- 0
}
# planning to remove is.null from pattern if statement and defaulting a patter based on specified type; pattern remains an argument to select one file only
nestedColName <- paste0("specDLS_", type)
nestedColName <- rlang::sym(nestedColName)
file_list <- list.files(directory_path, pattern = pattern, full.names = TRUE) |> {
\(l) rlang::set_names(l,
nm = purrr::map_chr(
l,
stringr::str_extract, "\\d{6}(?!/).*$"
)
)
}()
sheet_list <- file_list %>%
purrr::map(readxl::excel_sheets) %>%
purrr::map(~ .x[.x != "Sheet1"])
# print(file_list)
# print(sheet_list)
spectra_list <- purrr::map2(
file_list,
sheet_list,
function(files, sheets) {
purrr::map_dfr(
purrr::set_names(sheets), ~ suppressMessages(readxl::read_excel(files, sheet = .x, skip = skip, .name_repair = "universal")) %>%
# purrr::modify(readr::parse_number) %>%
# function to rename variables and reduce complexity of DLS scans at multiple temperatures
(function(df) {
xval <- grep(pattern = "Hydrodynamic.Diameter..nm.|Time..s.", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE)
yval <- grep(pattern = "Amp", names(df), ignore.case = TRUE, perl = TRUE, value = TRUE)
xnew <- "x"
if (grepl(pattern = "Hydro", names(df), ignore.case = TRUE, perl = TRUE)) {
xnew <- "hydroDia_x"
}
if (grepl(pattern = "Time", names(df), ignore.case = TRUE, perl = TRUE)) {
xnew <- "timeSec_x"
}
xsym <- rlang::sym(xnew)
if ((any(names(df) == "Hydrodynamic.Diameter..nm.") | any(names(df) == "Time..s.")) & any(names(df) == "Amplitude")) {
df_modified <- df %>%
dplyr::select(c(xval, yval)) %>%
dplyr::rename(!!xsym := xval, amp_y = Amplitude)
} else {
df_modified <- df %>%
dplyr::select(c(1:2)) %>%
dplyr::rename(!!xsym := 1, amp_y = 2)
message("DLS was performed at multiple temperatures. The first temperature data will be used.")
}
output <- df_modified %>% {
suppressMessages(tidyr::nest(., !!nestedColName := c(!!xsym, amp_y)))
}
return(output)
}) %>%
dplyr::select(!!nestedColName),
.id = "uni"
)
}
)
if (combine) {
return(
dplyr::bind_rows(spectra_list, .id = "origin") %>%
dplyr::mutate(
origin = dplyr::if_else(
stringr::str_detect(.$origin, stringr::regex("\\.uni.*$")),
stringr::str_extract(.$origin, stringr::regex(".*(?=\\.uni)", ignore_case = TRUE)),
stringr::str_extract(.$origin, stringr::regex(".*(?=\\.(xls|xlsx))", ignore_case = TRUE))
)
) %>%
tidyr::separate(origin, c("date", "instrument", "protein", "plate", "file"), sep = "-")
)
} else {
return(spectra_list)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.