cwrshelpr: Functions for CWRS Data Analysis

Documented in read_icp

#' Read ICP-MS data files generated by the Thermo Scientific iCAP-RQ
#'
#' @param path Path to stored ICP-MS files
#' @param sheet_name Name of MS Excel sheet where data are stored
#' @param first_line The first column matches this pattern at the first line of data.
#' As an input to `stringr::str_detect()`, this can be a partial match or a regex.
#' @param sample_names Name of column where sample names are stored.
#' @param estimate_types Name of column where estimate types are stored (e.g., average, standard deviation).
#' @param ... Arguments passed on to `readxl::read_excel()`.
#'
#' @return A tibble with columns `sample_name`, `category`, `isotope`, `element`,
#' `value`, and `unit`
#' @importFrom tibble as_tibble
#' @importFrom dplyr mutate filter select slice summarize across select_at transmute vars rename_with %>%
#' @importFrom stringr str_detect str_extract str_replace str_to_title str_remove_all
#' @importFrom tidyr replace_na pivot_longer fill
#' @importFrom tidyselect everything starts_with
#' @importFrom rlang set_names .data
#' @importFrom janitor clean_names make_clean_names
#' @importFrom readxl read_excel
#' @export
#'
#' @examples
#' file <- list.files(
#'    path = system.file("extdata", package = "cwrshelpr"),
#'    full.names = TRUE,
#'    pattern = ".+\\.xlsx"
#' )
#' read_icp(file[1])
read_icp <- function(
  path,
  sheet_name = "Dilution factor included",
  first_line = "Sample List",
  sample_names = "Label",
  estimate_types = "Category",
  ...
) {

  temp_names <- make_clean_names(paste(first_line, sample_names, sep = " "))
  temp_types <- make_clean_names(paste(first_line, estimate_types, sep = " "))

  path %>%
    read_excel(col_names = FALSE, sheet = sheet_name, ...) %>%
    start_here(first_line) %>%
    horizontal_fill() %>% # fill first row
    rename_multirow() %>% # get metadata from first two rows
    slice(-(1:2)) %>%
    clean_names() %>%
    pivot_longer(
      starts_with("x"),
      names_to = c("element", ".value"),
      names_pattern = "(.+ked_)(.+)"
    ) %>%
    transmute(
      sample_name = .data[[temp_names]],
      estimate_type = .data[[temp_types]],
      isotope = str_extract(.data$element, "\\d+"),
      element = str_to_title(str_replace(.data$element, "(x\\d+)([a-z]+)(.+)", "\\2")),
      value = as.numeric(.data$value),
      unit = .data$unit
    )
}

as_tibble_no_warning <- function(x) suppressWarnings(as_tibble(x))

rename_multirow <- function (x, n = 2, remove_na = FALSE) {
  x <- x %>%
    set_names(
      nm = slice(x, seq_len(n)) %>%
        summarize(across(everything(), ~ paste(.x, collapse = "_")))
    )
  if(remove_na) {
    rename_with(x, ~ str_remove_all(.x, "NA_"))
  } else {
    x
  }
}

start_here <- function(x, first_line) {
  start_indicator_var <- NULL
  # filter out rows before the data starts:
  x %>%
    mutate(
      start_indicator_var = str_detect(.data$`...1`, first_line) %>%
        replace_na(0) %>%
        cumsum() %>%
        as.logical()
    ) %>%
    filter(.data$start_indicator_var) %>%
    select(-start_indicator_var)
}

horizontal_fill <- function(x, row = 1) {
  tempname <- paste0("V", row)
  x %>%
    t() %>%
    as_tibble_no_warning() %>%
    fill(.data[[tempname]]) %>%
    t() %>%
    as_tibble_no_warning()
}