#' Read raw ion count data
#'
#' \code{read_IC()} is designed to obtain the numerical data associated with ion
#' counts and minimal set meta-data.
#' \code{read_meta()} can be used to specifically retrieve the meta date
#' associated with ion count data analysis, thereby loading specifications
#' related to the optics, the primary and secondary ion beams, and the mass
#' spectrometer.
#'
#' Ion count data consists of time-incremented integer values. These functions
#' are currently only supported for data generated by a NanoSIMS50L. Raw ion
#' count data and accompanying is extracted and collated into a single tibble
#' from text files with the extensions \emph{.is_txt} \emph{.chk_is} and
#' \emph{.stat}, respectively. These files can be found in the directories
#' associated with the SIMS measurements.
#'
#' @param directory A path or connection to a directory containing raw ion count
#' data txt files.
#' @param meta Logical indicating whether to include full meta-data.
#' @param hide Logical indicating whether metadata is included as columns
#' \code{FALSE} or as an attribute of the tibble \code{TRUE}.
#'
#'
#' @return A \code{tibble::\link[tibble:tibble]{tibble}} containing raw ion
#' count data and metadata.
#' @export
#' @examples
#' # Use point_example() to access the examples bundled with this package
#'
#' read_IC(point_example("2018-01-19-GLENDON"))
read_IC <- function(directory, meta = FALSE, hide = TRUE){
# List files
ls_IC <- read_validator(directory, "is_txt")[["is_txt"]]
# Collecting measurement data
tb_IC <- vroom::vroom(
ls_IC,
comment = "B",
delim = "\t",
skip = 1,
col_types = "-dd",
col_select = c("file.nm", t.nm = .data$X, N.rw = .data$Y),
na = c("X", "Y"),
id = "file.nm",
.name_repair = "minimal"
) %>%
tidyr::drop_na() %>%
dplyr::mutate(
file.nm =
dplyr::recode(.data$file.nm, !!! rlang::set_names(names(ls_IC), ls_IC))
)
# meta data names according to Cameca
point_nms <- dplyr::filter(
point::names_cameca,
.data$extension == ".is_txt",
.data$use == "meta"
)
tb_meta <- point_lines(ls_IC, pattern = "B", sep = "\\=", id = "num.mt") %>%
# meta data names
dplyr::rename(rlang::set_names(point_nms$cameca, nm = point_nms$point)) %>%
dplyr::mutate(
species.nm = stringr::str_extract(.data$mass.mt, "(?<=\\().+?(?=\\))"),
tc.mt = readr::parse_number(.data$tc.mt)
)
# vector of detector numbering
vc_num <- rep(
# number of detectors
1:max(tb_meta$num.mt),
# number of measurements per detector per analysis
each = nrow(tb_IC) / length(ls_IC) / max(tb_meta$num.mt),
# total number of measurements
length.out = nrow(tb_IC)
)
tb_rw <- dplyr::left_join(
tibble::add_column(tb_IC, num.mt = vc_num),
tb_meta,
by = c("file.nm", "num.mt")
)
# extended meta-data
if (isTRUE(meta)) {
suppressMessages(
tb_rw <- rlang::list2(tb_rw, !!! read_meta(directory)) %>%
purrr::reduce(dplyr::left_join)
)
# Add block number
tb_rw <- dplyr::group_by(tb_rw, .data$file.nm, .data$species.nm)%>%
dplyr::mutate(bl.nm = dplyr::ntile(n = as.numeric(.data$bl_num.mt))) %>%
dplyr::ungroup()
}
# hide meta-data
if (isTRUE(hide)) tb_rw <- fold(tb_rw, type = ".mt")
tb_rw
}
#' @rdname read_IC
#'
#' @export
read_meta <- function(directory) {
# Check validity of directory
ls_files <- read_validator(directory)
# vector of cameca variable names
vc_meta <- dplyr::filter(
point::names_cameca,
.data$extension == ".chk_is",
.data$format == "line"
)
# optics set-up
meta_join <- function(meta) {
tidyr::pivot_longer(meta, -c(.data$file.nm, .data$id), names_to = "meta")
}
tb_ll <- purrr::map(
vc_meta$cameca,
~point_lines(
ls_files[["chk_is"]],
pattern = .x,
sep = "\\:",
delim = "/"
)
) %>%
purrr::compact() %>%
purrr::map(meta_join) %>%
dplyr::bind_rows() %>%
dplyr::distinct(.data$file.nm, .data$meta, .data$value, .keep_all = TRUE) %>%
dplyr::select(-.data$id) %>%
tidyr::pivot_wider(names_from = "meta")
# PHD
tb_phd <- point_table(
directory,
pattern_begin = "Phd Centering Results",
pattern_end = "E0S Centering Results",
file_type = "chk_is",
col_names = c("", "num.mt", "M_PHD.mt", "SD_PHD.mt", "EMHV.mt"),
col_types = "-cddd",
nudge_top = 1,
nudge_tail = -3
) %>%
dplyr::mutate(num.mt = readr::parse_number(.data$num.mt))
dplyr::rename(
tb_ll,
dplyr::any_of(rlang::set_names(vc_meta$cameca, nm = vc_meta$point))
) %>%
dplyr::mutate(
# Add measurement number
n.rw = as.numeric(.data$`bl_num.mt`) * as.numeric(.data$`meas_bl.mt`),
# Add electron detector type (EM or FC)
det_type.mt = dplyr::if_else("FC_start.mt" %in% colnames(.), "FC", "EM")
) %>%
list(tb_phd)
}
#' Get path to point example
#'
#' This function comes from the package `readr`, and has been modified to access
#' the bundled datasets in directory `inst/extdata` of `point`. This
#' function make them easy to access. This function is modified from
#' \code{\link[readr:readr_example]{readr_example}} of the package
#' \code{\link[readr]{readr}}.
#'
#' @param path Name of file. If `NULL`, the example files will be listed.
#' @export
#' @examples
#' point_example()
#' point_example("2018-01-19-GLENDON")
point_example <- function(path = NULL) {
if (is.null(path)) {
dir(system.file("extdata", package = "point"))
} else {
system.file("extdata", path, package = "point", mustWork = TRUE)
}
}
#' Check if directory is suitable for point
#'
#' This function checks whether the necessary files for the `point` read
#' functions are included in the directory.
#'
#' @param directory A path or connection to a directory containing raw ion count
#' data files.
#' @param types Regular expression for the required file extensions. Default
#' searches for files ending with .is_txt, .chk_is, and .stat
#'
#' @return A logical indicating whether the directory is suitable for `point`
#' @export
#' @examples
#' ICdir_chk(point_example("2018-01-19-GLENDON"))
ICdir_chk <-function(directory, types = c("is_txt", "chk_is", "stat")){
# types <- paste0(".", types)
# check if type is valid
sys_types <- c("is_txt", "chk_is", "stat") %>% rlang::set_names()
if (any(types %in% sys_types)) {
types <- sys_types[sys_types %in% types]
} else {
stop("Unknown extension.", call. = FALSE)
}
# directory name if also file name
dir_nm <- stringr::str_extract(
directory,
stringr::str_c("(?<=", dirname(directory), "/)(.)+")
)
ls_files <- fs::dir_ls(directory)%>%
purrr::keep(stringr::str_detect(., pattern = dir_nm))
ls_names <- unique(fs::path_ext_remove(fs::path_file(ls_files))) %>%
purrr::keep(stringr::str_detect(., pattern = "(_[:digit:]+_[:digit:]+)$"))
ls_types <- purrr::cross(list(directory, ls_names, ext = types)) %>%
purrr::map_chr(purrr::lift(fs::path)) %>%
rlang::set_names(nm = rep(ls_names, dplyr::n_distinct(types)))
if (length(ls_types > 0) & all(ls_types %in% ls_files)) {
# makes grouped list
split(ls_types, rep(names(types), each = dplyr::n_distinct(ls_names)))
} else {
FALSE
}
}
#' Access and hide IC metadata
#'
#' \code{unfold()} helps unpack metadata associated with ion count
#' data loaded with \code{read_IC()}. \code{fold()} does the opposite an hides
#' the metadata as attribute of the tibble.
#'
#' @param df A tibble containing ion count data along any point of the point-
#' workflow
#' @param type A character string identifying the metadata (default:
#' \code{"metadata"})
#' @param merge Logical dictating whether metadata is joined to the tibble or
#' returned as a separate file.
#' @param meta Additional tibble containing the metadata for storage along the
#' main IC data.
#'
#' @return A tibble with metadata as an attribute, columns or as a seperate
#' tibble.
#' @export
#' @examples
#' tb_rw <- read_IC(point_example("2018-01-19-GLENDON"), hide = TRUE)
#'
#' # Unfold metadata
#' unfold(tb_rw, merge = FALSE)
unfold <- function(df, type = "metadata", merge = TRUE) {
# no attribute of name type return unchanged data
if (is.null(attr(df, type))) {
warning("Attribute unavailable.", call. = FALSE)
return(df)
}
meta <- attr(df, type)
vars <- dplyr::select(meta, dplyr::ends_with(".nm")) %>%
colnames()
vars <- vars[which(vars %in% colnames(df))]
if (merge) return(dplyr::left_join(df, meta, by = vars)) else return(meta)
}
#' @rdname unfold
#'
#' @export
fold <- function(df, type, meta = NULL) {
vc_type <- c(`metadata` = ".mt", `rawdata` = ".rw", `modeldata` = ".ml")
vc_type <- vc_type[vc_type %in% type]
if (is.null(meta)){
tb <- dplyr::select(df, -c(dplyr::ends_with(type)))
ls_tb <- purrr::map(
vc_type,
~dplyr::select(df, dplyr::ends_with(".nm") | dplyr::ends_with(.x))
)
ls_tb[[length(vc_type) + 1]] <- (tb)
} else {
ls_tb <- rlang::list2(metadata = meta, df)
}
purrr::reduce2(rev(ls_tb), rev(names(vc_type)), write_attr)
}
#-------------------------------------------------------------------------------
# Function for testing and validation (NOT EXPORTET)
#-------------------------------------------------------------------------------
# Validation function to check for empty files or files with empty columns
read_validator <- function(directory, types = c("is_txt", "chk_is", "stat")){
# Argument class check
stopifnot(fs::is_dir(directory))
# Check if directory contains files
if (length(dir(directory)) == 0) {
stop("`directory` does not contain any files.", call. = FALSE)
}
# Check if directory contains specified file types
if (isFALSE(ICdir_chk(directory, types))) {
stop(
paste0("`directory` does not contain required filetypes: .is_txt,",
" .chk_is, and .stat."),
call. = FALSE
)
} else {
ls_files <- ICdir_chk(directory, types)
}
# Length check of txt files
if ("is_txt" %in% types & any(missing_text(ls_files[["is_txt"]]) == 0)) {
good <- missing_text(ls_files[["is_txt"]]) > 0
ls_files[["is_txt"]] <- ls_files[["is_txt"]][good]
warning("Empty txt file removed.", call. = FALSE)
} else {
return(ls_files)
}
}
# Row scanner determine number of rows
row_scanner <- function(ls, reg_expr, return_line = FALSE, nudge = 0) {
lines <- vroom::vroom_lines(ls)
pos_line <- stringr::str_which(lines, reg_expr)
ext_line <- lines[pos_line + nudge]
# Are these lines identical ?
if (isTRUE(return_line)) {
if (length(unique(ext_line)) > 1) {
warning("Column names are not equal.", call. = FALSE)
}
col_nms <- stringr::str_split(
unique(ext_line),
"\\s(?=[[:upper:]])"
)[[1]] %>%
stringr::str_trim()
# empty strings
nm_empty <- stringi::stri_isempty(col_nms)
col_nms[nm_empty] <- paste0("X", seq_along(nm_empty))[nm_empty]
list(pos_line, col_nms)
} else {
pos_line
}
}
# File validator. Empty text files
missing_text <- function(files) {
purrr::map_dbl(files, ~length(vroom::vroom_lines(.x, n_max = 2)))
}
write_attr <- function(df1, df2, nm) {
attr(df1, nm) <- df2
df1
}
# extracting single lines from cameca
point_lines <- function(files, pattern = NULL, position = NULL, sep = NULL,
delim = ":", id = "id") {
# names files
file_nms <- names(files)
# load all lines
files <- vroom::vroom_lines(files)
# filter lines
if (!is.null(position)) {
files <- files[position]
} else if (!is.null(pattern)) {
files <- stringr::str_subset(
files,
pattern =
stringr::str_c("\\Q", pattern, "\\E", "\\s*", sep, collapse = "|"))
}
# short cut if pattern does not exist
if (length(files) == 0) return(NULL)
# line numbers
file_num <- rep(1: (length(files) / length(file_nms)), length(file_nms))
# update names if multiple rows are extracted per file
if (length(files) > length(file_nms)) {
file_nms <- rep(file_nms, each = length(files) %/% length(file_nms))
}
# replace NAs
files <- stringr::str_replace_all(
files,
pattern = "N\\/A",
replacement = "NA"
)
# extract column names with regex
col_nms <- stringr::str_extract_all(
files,
paste0("(?<=(\\", delim, "|^))(.)+?(?=(", sep ,"|$))")
) %>%
purrr::flatten_chr() %>%
stringr::str_trim() %>%
unique()
# regex column names
remove_reg <- stringr::str_c(
"(\\Q", col_nms, "\\E\\s*", sep, ")",
collapse = "|"
)
# extract column names regex from output to obtain values
vals <- stringr::str_remove_all(files, paste0(remove_reg, "|\\s"))
# create appropriate value separators
separators <- rep(
c(rep(delim, length(pattern) - 1), "\n"),
length(files) / length(pattern)
)
vals <- stringr::str_c(vals, separators, collapse = "")
vroom::vroom(
I(vals),
delim = delim,
# default to character
col_types = vroom::cols(.default = vroom::col_character()),
col_names = col_nms,
show_col_types = FALSE
) %>%
tibble::add_column(
file.nm = file_nms,
{{id}} := file_num,
.before = col_nms[1]
)
}
# function to read CAMECA output to validate point output
point_table <- function(directory, pattern_begin, table_depth,
pattern_end = NULL, file_type, col_types = NULL,
col_names = NULL, nudge_top = 0, nudge_tail = 0,
table_dups = NULL) {
ls_files <- ICdir_chk(directory, file_type)
# top position table
min_row <- purrr::map(
ls_files[[file_type]],
~row_scanner(
.x,
pattern_begin,
return_line = TRUE,
nudge = nudge_top
)
) %>%
purrr::transpose()
# max depth of table
if (!is.null(pattern_end)) {
max_row <- purrr::map(
ls_files[[file_type]],
~row_scanner(
.x,
pattern_end,
nudge = nudge_tail
)
)
table_row <- purrr::map2(max_row, min_row[[1]], ~ .x - .y + nudge_tail)
} else if (!is.null(table_depth)) {
table_row <- purrr::map(min_row[[1]], ~rep(table_depth, length(.x)))
}
# execute reading functions
purrr::imap_dfr(
ls_files[[file_type]],
~read_point_table(
.x,
min_row[[1]][.y],
table_row[.y],
if (is.null(col_names)) min_row[[2]][[.y]] else col_names,
col_types,
table_dups
),
.id = "file.nm"
)
}
# reading tables in Cameca meta data
read_point_table <- function(files, skip_rows, table_rows, var_names,
col_types, table_dups = NULL) {
# Na aliases
NA_aliases <- c(mapply(strrep,"X", 1:10, USE.NAMES = FALSE), "1.#R")
# execute read function
purrr::map2_dfr(
skip_rows,
table_rows,
~vroom::vroom_fwf(
files,
vroom::fwf_empty(
files,
skip = .x + 1,
n = .y,
col_names = var_names
),
skip = .x + 1,
n_max = .y,
col_types = col_types,
na = NA_aliases,
.name_repair = "minimal"
),
.id = table_dups
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.