#' Extracting raw Pb isotope ratio.
#'
#' Extracts raw 208Pb/207Pb, 206Pb/207Pb, 208Pb/206Pb and 207Pb/206Pb data from the raw .csv file
#' produced by Perkin Elmer Elan ICP-MS.
#'
#' @import data.table
#'
#' @param filename A data set saved in csv format with "," as field separator and "." as decimal
#' separator. Such file can be obtained from the .rep report provided by the instrument by opening
#' it with a spreadsheet editor and saving it as .csv file using a local setting = EN-US or
#' equivalent. The function identifies standard samples for bracketing when labelled as “SRM” and
#' substitutes their names with “SRM981_X” where X is a progressive integer. Similarly, QC samples
#' “CRM” are renamed as “CRM482_X”.
#'
#' @param report A string referring to the type of report generated by the instrument Accepted
#' values are \code{"short"} or \code{"long"}. \itemize{ \item{\bold{Short}}{ reports have only
#' information about the sample ID, data and time of the measure, a summary with average
#' intensities and finally, the intensity values for each replicate.} \item{\bold{Long}}{ reports
#' contain also information of the "short" counterpart and also some information about the
#' measurement conditions. Additionally, the summary section is reported after the intensity
#' values for each replicates. These reports are tipically produced when working in isotope ratio
#' mode.} }
#'
#' @return A data.table containing raw intensities and raw Pb isotope ratios for each sample.
#' Provided isotope ratios are 208Pb/207Pb, 206Pb/207Pb, 208Pb/206Pb and 207Pb/206Pb. Note that
#' these ratios have still to be inspected for outliers, summarised and corrected for mass bias.
#'
#' @examples
#' file.short <- system.file("extdata", "sednya_2015.csv", package = "pbratios")
#' sednya.2015 <- extract_data(file.short, report = "short")
#'
#' file.long <- system.file("extdata", "spmnya_2012.csv", package = "pbratios")
#' spmnya.2012 <- extract_data(file.long, report = "long")
#'
#' @export
#'
extract_data <- function(filename, report = c("short", "long")) {
report <- match.arg(report)
if (report != "short" & report != "long") {
stop('Error: report must be either equal to "short" or "long".')
}
# Loading the file
data <- fread(filename, header = FALSE, na.strings = "")
# Extracting the sample name
sampleid <- data[grepl("Sample ID", V1), V2]
# Fixing names for SRMs and CRMs
srm_id <- grepl("SRM", sampleid) & !grepl("test", sampleid)
crm_id <- grepl("CRM", sampleid)
sampleid[srm_id] <- paste("SRM981", 1:length(sampleid[srm_id]), sep = "_")
sampleid[crm_id] <- paste("CRM482", 1:length(sampleid[crm_id]), sep = "_")
if (report == "short"){
# Row number for "Replicates" and "Sample ID"
rep.row <- data[, row.id := 1:.N][grepl("Replicates", V1), row.id]
sample.row <- data[grepl("Sample ID", V1), row.id][-1]
sample.row[length(sample.row) + 1] <- data[.N, row.id + 1]
# Row numbers with data for each sample
n.row <- sample.row - rep.row - 1
# Select row with data
data <- data[do.call(c, mapply(seq, rep.row + 1, sample.row - 1, SIMPLIFY = FALSE)), ][,
# Assigning sample names
sample := rep(sampleid, n.row)][
# Remove rows with "Analyte"
-grep("Analyte", V1)]
data <- setnames(
data,
old = c("V1", "V2", "V3"),
new = c("element", "isotope", "intensity")
)
}
if (report == "long"){
# Row number for "Replicates" and "Summary"
rep.row <- data[, row.id := 1:.N][grepl("^Replicates$", V1), row.id]
summary.row <- data[grepl("^Summary$", V1), row.id]
# Row numbers with data for each sample
n.row <- summary.row - rep.row - 1
# Select row with data
data <- data[do.call(c, mapply(seq, rep.row + 1, summary.row - 1, SIMPLIFY = FALSE)), ][,
# Assigning sample names
sample := rep(sampleid, n.row)][
# Remove rows with "Replicate"
-grep("Replicate", V1)][
# Remove rows with "Analyte"
-grep("Analyte", V2)]
## Rename the columns
data <- setnames(
data,
old = c("V2", "V3", "V4"),
new = c("element", "isotope", "intensity")
)
}
# Merge columns "element" and "isotope" to the new column "nuclide"
data <- data[, nuclide := paste0(element, isotope)][,
.(sample, nuclide, intensity)]
## Assigning each column to its right class
data.factor <- c("sample", "nuclide")
data[, (data.factor) := lapply(.SD, as.factor),
.SDcols = data.factor][, intensity := as.double(intensity)] # S values <- NAs
# Put the dataset in wide format
lvls <- as.character(unique(data$sample))
data <- dcast(data[, id := 1:.N, by = .(sample, nuclide)],
id + sample ~ nuclide, value.var = 'intensity')[,
sample := factor(sample, levels = lvls)][order(sample)]
# Calculate the raw ratios
data[, c("Pb208207",
"Pb206207",
"Pb208206",
"Pb207206") :=
.(Pb208 / Pb207,
Pb206 / Pb207,
Pb208 / Pb206,
Pb207 / Pb206)][, -"id", with = FALSE]
}
# End of function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.