#' @include IsodatContinuousFlowFileClass.R
NULL
#' H-CSIA DataClass
#'
#' Objects of this class hold the isotopic data from compound specific hydrogen
#' isotope analysis recorded in Isodat file formats (currently supported isodat
#' version is 2.0 for chromatographic and peak table data and isodat version 2.5 and
#' 3.0 for chromatographic data only).
#'
#' This class is derived indirectly from \link{IrmsContinuousFlowData} which defines a
#' number of useful plotting, export and data access methods.
#'
#' @name IsodatHydrogenContinuousFlowFile
#' @exportClass IsodatHydrogenContinuousFlowFile
#' @seealso \link{BinaryFile}, \link{IsodatFile}, \link{IsodatContinuousFlowFile}, \link{IrmsContinuousFlowData}, \link{IrmsData}
IsodatHydrogenContinuousFlowFile <- setRefClass(
"IsodatHydrogenContinuousFlowFile",
contains = "IsodatContinuousFlowFile",
fields = list (),
methods = list(
#' initialize irms data container
init_irms_data = function(){
callSuper()
# mass and ratio plot options
set_plot_options(
masses = list(
mass2 = list(label = "Mass 2", color="black", offset=200), #offset in mV
mass3 = list(label = "Mass 3", color="dark green", offset=0)),
ratios = list (
ratio_3o2 = list(label = "Ratio", color="black", offset=0))
)
# peak table definition
dataTableColumns <<- data.frame(
data = c('Filename', 'Peak Nr.', 'Ref. Peak', 'Status', 'Component', 'Formula', 'Master Peak', 'Ref. Name',
'Start\n[s]', 'Rt\n[s]', 'End\n[s]', 'Width\n[s]', 'Ampl. 2\n[mV]', 'Ampl. 3\n[mV]', 'BGD 2\n[mV]', 'BGD 3\n[mV]',
'Area All\n[Vs]', 'Area 2\n[Vs]', 'Area 3\n[Vs]', 'rArea All\n[mVs]', 'rArea 2\n[mVs]', 'rArea 3\n[mVs]',
'R 3H2/2H2\n', 'rR 3H2/2H2\n', 'rd 3H2/2H2\n[per mil]\nvs. methane ref', 'd 3H2/2H2\n[per mil]\nvs. VSMOW',
'DeltaDelta 3H2/2H2\n[0 ]', 'R 2H/1H', 'd 2H/1H\n[per mil]\nvs. VSMOW', 'AT% 2H/1H\n[%]', 'Rps 3H2/2H2'),
type = c("character", "integer", "logical", "character", "character", "character", "character", "character",
rep("numeric", 14),
"Ratio", "Ratio", "Delta", "Delta", "numeric", "Ratio", "Delta", "numeric", 'numeric'),
show = TRUE, stringsAsFactors = FALSE)
dataTableColumns <<- mutate(
dataTableColumns,
column = sub('^([^\n]+).*', '\\1', data), # pull out column names from the data names
units = sub('\n', ' ', sub('^[^\n]+(\n)?(.*)$', '\\2', data))) # pull out units from the data names
# key peaks
dataTableKeys <<- c(peak_nr = "Peak Nr.", ref_peak = "Ref. Peak",
rt = "Rt", rt_start = "Start", rt_end = "End",
name = "Component")
},
# READ DATA =========================
#' expand parent load function with key_cleanup
load = function(...) {
callSuper(...)
clean_keys(
removeText="Arial",
removePattern = "[&{}!^@?#]",
unlessByteLength = 26,
unlessText = "Is Ref.?")
},
#' expand parent procdess function specifically for hydrogen continuous flow data
#' NOTE: this does NOT callSuper because the hydrogen file (.cf) seems very different from
#' generic IsodatContinuousFlowFile (.dxf)
#' @param read_mass_data whether to read chromatographic mass+ratio data (can be a lot of data)
process = function(read_mass_data = TRUE, ...) {
# process header
move_to_key("CRawDataScanStorage")
parse("UTF16", length = 13, id = "data_trace_name", skip_first = 14)
parse("long", id = "n_measurements", skip_first = 20)
parse("short", id = "n_ions", skip_first = 0)
parse("short", id = "n_ions", skip_first = 29)
# read mass2/mass3 data trace
if (read_mass_data) {
parse_array(
types = c(time = "float", mass2 = "double", mass3 = "double"),
n = data$n_measurements, id = "mass", skip_first = 0)
}
# footer
parse("UTF16", length=6, id = "trace1_name", skip_first = 70)
parse("UTF16", length=6, id = "trace2_name", skip_first = 4)
# ratio data header
move_to_key("CRatioDataScanStorage")
parse("UTF16", length = 13, id = "data_ratio_name", skip_first = 14)
parse("long", id = "n_ratio_measurements", skip_first = 20)
parse("short", id = "n_ratios", skip_first = 0)
parse("short", id = "n_ratios", skip_first = 18)
# read ratio data
if (read_mass_data) {
parse_array(
types = c(time = "float", ratio_3o2 = "double"),
n = data$n_ratio_measurements, id = "ratio", skip_first = 0)
}
# other information
move_to_key("H3 Factor")
parse("double", id = "H3factor", skip_first = 8)
data$GCprogram <<- find_key(".gcm$")$value
data$MSprogram <<- find_key(".met$")$value
data$Filename <<- find_key(".cf$")$value
data$ASprogram <<- find_key("Internal")$value
# reorganize data, move to IrmsDataClass structure
if (read_mass_data) {
massData <<- cbind(data$mass, data$ratio['ratio_3o2'])
data$mass <<- data$ratio <<- NULL
}
# peak table
rawtable <- rawdata[subset(keys, value=="CPkDataListBox")$byteEnd:subset(keys, value=="CGCPeakList")$byteStart]
arials <- grepRaw("([Arial][^\u0020-\u007e]){5}", rawtable, all=TRUE)
#FIXME: newer versions of isodat (2.5 and 3.1 don't have this business, just 18 bytes between each label!)
if (length(arials) < 5) {
warning("peak table entries not found, this could be because the file might be created with a newer version (>2) of isodat. ",
"files from isodat 2.5 and 3.1 are known to have this problem but are currently not yet supported")
} else {
entries<-NULL
spos <- 9 + (regexpr("14000000fffeff08", paste(readBin(rawtable[1:(arials[1]-48)], "raw", n=(arials[1]-48)), collapse=""), fixed=TRUE)-1)/2
for (i in arials) {
epos<-(i-48)
entries<-c(entries, paste(readBin(rawtable[spos:epos], "character", n=(epos-spos)/2, size=2), collapse=""))
spos<-i+100
}
entries <- entries[-length(entries)] # last entry is garbage
# sometimes there is an extra column (rps), sometimes not
if (! (rps_column <- length(entries) %% 28 == 0) && # rps column
! (length(entries) %% 27 == 0)) { # no rps column
# neither 27 nor 28 columns! not sure what's going on
assign("isoread_debug", entries, env = globalenv())
warning("it appears the peak table has neither exactly 27 nor 28 columns, not sure how to deal with this scenario. ",
"a dump of all entries recovered from the peak table is stored in the global variable 'isoread_debug'")
} else {
table<-matrix(entries, byrow=TRUE, ncol=if(rps_column) 28 else 27) # FIXME not sure this is always true that it's 27 columns but appears to be the case
df<-data.frame(table[2:nrow(table),], stringsAsFactors=FALSE)
names(df)<-table[1,]
# add rps column if missing
if (!rps_column)
df$`Rps 3H2/2H2` <- ""
# process peak nr
if ( !('Peak Nr.' %in% names(df) ))
stop("'Peak Nr.' column not found in peak table. Only available columns are ", paste0(names(df), collapse = ", "))
peakNrPattern <- "^([0-9]+)([\\*\\+]?)$"
df <- mutate(
df,
`Ref. Peak` = sub(peakNrPattern, '\\2', `Peak Nr.`) == "*", # whether it is a reference peak
Status = sapply(sub(peakNrPattern, '\\2', `Peak Nr.`), # whether peak was added
function(x) { if (x=="+") "Added" else "Auto" }),
Formula = "",
`Peak Nr.` = as.integer(sub(peakNrPattern, '\\1', `Peak Nr.`))) # peak number as integer
# store peak table data
dataTable <<- df
}
}
},
# COMPUTATION ================
#' Evaluate data in peak table
#'
#' This function uses the standards defined in the peak table to (re)evaluate the isotopic
#' composition of the other peaks in this reference frame.
#'
reevaluate_peak_table = function() {
stop("not implemnted yet")
},
# VISUALIZATION / PLOTTING ==========================
#' Make a ggplot of the references
#'
#' By default, this makes a plot that generates an overview
#' of how close the reference peaks are to
#' each other.
plot_refs = function(
y = isotopia::to_delta(`rR 3H2/2H2`, mean(`rR 3H2/2H2`)),
ylab = "dD [permil] vs mean ratio",
title = "Variation in reference peaks"){
# FIXME not sure how to call super from here to avoid this code replication
do.call(.self$plot_data_table, list(y = substitute(y), ylab = ylab, title = title, data = get_data_table(type = "ref")))
},
#' Make a ggplot of the data
#'
#' By default, this makes a plot of the delta values.
plot_data = function(
y = `d 2H/1H`,
ylab = "dD [permil] vs VSMOW",
title = "Data peaks"){
do.call(.self$plot_data_table, list(y = substitute(y), ylab = ylab, title = title, data = get_data_table(type = "data")))
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.