R/isoread_caf.R

Defines functions extract_caf_vendor_data_table extract_caf_raw_voltage_data iso_read_caf

# read isodat .caf files
# @param ds the data structure to fill
# @param custom reader options - none needed
iso_read_caf <- function(ds, options = list()) {
  
  # safety checks
  if(!iso_is_dual_inlet(ds)) 
    stop("data structure must be a 'dual_inlet' iso_file", call. = FALSE)
  
  # read binary file
  ds$source <- get_ds_file_path(ds) |> read_binary_isodat_file()
  
  # process file info
  if(ds$read_options$file_info) {
    ds <- exec_func_with_error_catch(extract_isodat_old_sequence_line_info, ds)
    # NOTE: measurement info (see did) does not seem to be stored in caf files
    ds <- exec_func_with_error_catch(extract_isodat_datetime, ds)
    # NOTE: the following extraction is not tested but should work (don't have a .caf file with H3 factor)
    ds <- exec_func_with_error_catch(extract_H3_factor_info, ds)
    ds <- exec_func_with_error_catch(extract_MS_integration_time_info, ds)
  }
  
  # process raw data
  if (ds$read_option$raw_data){
    ds <- exec_func_with_error_catch(extract_caf_raw_voltage_data, ds)
  }
  
  # process method info
  if (ds$read_options$method_info) {
    ds <- exec_func_with_error_catch(
      extract_isodat_reference_values, ds,
      function(bin) cap_at_pos(bin, find_next_pattern(bin, re_unicode("Administrator"))))
    ds <- exec_func_with_error_catch(extract_isodat_resistors, ds)
  }
  
  # process pre-evaluated data table
  if (ds$read_options$vendor_data_table)
    ds <- exec_func_with_error_catch(extract_caf_vendor_data_table, ds)
  
  return(ds)
}

# extract voltage data in caf file
extract_caf_raw_voltage_data <- function(ds) {

  # locate masses
  ds$source <- ds$source |> 
    set_binary_file_error_prefix("cannot identify measured masses") |> 
    move_to_C_block_range("CResultData", "CEvalDataIntTransferPart") 
  
  # read all masses
  masses_re <- re_combine(re_x_000(), re_text_x(), re_unicode("rIntensity"))
  masses <- 
    tibble(
      pos = find_next_patterns(ds$source, masses_re) + masses_re$size,
      # capture cup and mass
      data = map(.data$pos, function(pos) {
        capture <- 
          ds$source |>
          move_to_pos(pos) |>  
          capture_data_till_pattern("cup", "text", re_text_x(), data_bytes_max = 8, move_past_dots = TRUE) |>
          move_to_next_pattern(re_unicode("rIntensity "), max_gap = 0L) |> 
          capture_data_till_pattern("mass", "text", re_text_x(), data_bytes_max = 8)
        dplyr::as_tibble(capture$data[c("cup", "mass")])
      })
    ) |> 
    # unnest data
    unnest("data") |> 
    mutate(
      cup = as.integer(.data$cup),
      column = str_c("v", .data$mass, ".mV")
    )

  # locate voltage data
  ds$source <- ds$source |> 
    set_binary_file_error_prefix("cannot locate voltage data") |> 
    move_to_C_block_range("CDualInletRawData", "CResultData") 
  
  # find binary positions for voltage standards and samples
  standard_block_start <- find_next_pattern(
    ds$source, re_combine(re_unicode("Standard Block"), re_null(4), re_x_000()))
  sample_block_start <- find_next_pattern(
    ds$source, re_combine(re_unicode("Sample Block"), re_null(4), re_x_000()))
  
  # safety checks
  if (is.null(standard_block_start) || is.null(sample_block_start) || 
      standard_block_start > sample_block_start) {
    iso_source_file_op_error(ds$source, "cannot find standard and sample voltage data blocks at expected positions")
  }
  
  # read voltage data
  ds$source <- set_binary_file_error_prefix(ds$source, "cannot process voltage data") 
  
  # right before this sequence there is a 4 byte sequence that could be a date, the last block is the # of masses
  read_blocks_re <- re_combine(re_null(4), re_block("etx"), re_x_000())
  positions <- find_next_patterns(ds$source, read_blocks_re)
  
  # function to capture voltages
  capture_voltages <- function(pos) {
    bin <- ds$source |> 
      move_to_pos(pos - 4) |> 
      capture_n_data("n_masses", "integer", n = 1)
    
    # safety check
    if (bin$data$n_masses != nrow(masses)) {
      iso_source_file_op_error(bin, glue("inconsistent number of voltage measurements encountered ({bin$data$n_masses}), expected {nrow(masses)}"))
    }
    
    bin <- bin |> 
      capture_n_data("voltage", "double", n = nrow(masses), sensible = c(-1000, 100000))
    
    # return voltage data
    return(tibble(cup = 1:nrow(masses), voltage = bin$data$voltage))
  }
  
  # assemble voltages data frame
  voltages <- tibble(
    pos = positions + read_blocks_re$size,
    # note last read in the sample block is actually the "pre"-read of the standard
    type = ifelse(.data$pos < sample_block_start | .data$pos==max(.data$pos), "standard", "sample")
  ) |> 
    group_by(.data$type) |> 
    mutate(
      cycle = as.integer(ifelse(.data$type[1] == "standard" & .data$pos == max(.data$pos), 0L, 1L:n())),
      # capture voltages
      voltages = map(.data$pos, capture_voltages)
    ) |> ungroup() |> 
    # unnest voltager data
    unnest("voltages") |> 
    # combine with cup/mass information
    left_join(select(masses, "cup", "column"), by = "cup") 
  
  # safety check
  if (any(notok <- is.na(voltages$column))) {
    iso_source_file_op_error(ds$source, glue("inconsistent cup designations: {collapse(voltages$cup[notok], ', ')}"))
  }
  
  # voltages data frame
  ds$raw_data <- 
    voltages |> 
    select(-"pos", -"cup") |> 
    spread(.data$column, .data$voltage) |> 
    arrange(desc(.data$type), .data$cycle)
  
  return(ds)
}

# extract data table
extract_caf_vendor_data_table <- function(ds) {
  
  # reset navigation
  ds$source <- reset_binary_file_navigation(ds$source)
  
  # get data table
  extracted_dt <- 
    ds |> 
    # FIXME: see testing.Rmd for trying to switch to vendor_data_table2 (not possible yet)
    extract_isodat_main_vendor_data_table(
      C_block = "CResultData", cap_at_fun = NULL,
      col_include = "^(d |AT |Nr\\.|Is Ref)", 
      skip_row_check = function(column, value) column == "Is Ref.?" && value == "1")
  vendor_dt <- extracted_dt$cell_values
  
  # safety check
  req_cols <- c("Nr.", "Is Ref.?")
  if (!all(ok <- req_cols %in% names(vendor_dt))) {
    glue("not all required columns found, missing: {collapse(req_cols[!ok], '. ')}") |> 
      stop(call. = FALSE)
  }
  
  # divided row columns (some are in the first block, some in the second)
  second_block_cols <- vendor_dt[1,] |> map_lgl(~is.na(.x))
  if (sum(second_block_cols) > 0) {
    # separate and merge the two blocks
    condition <- as.name(names(vendor_dt)[which(second_block_cols)[1]])
    first_block <- filter(vendor_dt, is.na(!!condition))[!second_block_cols]
    second_block_cols['Nr.'] <- TRUE
    second_block <- filter(vendor_dt, !is.na(!!condition))[second_block_cols]
    vendor_dt <- left_join(first_block, second_block, by = "Nr.") 
  }
  
  # assign data table
  ds$vendor_data_table <- vendor_dt |> 
    arrange(!!as.name("Nr.")) |> 
    mutate(cycle = as.integer(1:n())) |> 
    select(-"Nr.", -"Is Ref.?") |> 
    select("cycle", dplyr::everything())
  
  # save information on the column units
  attr(ds$vendor_data_table, "units") <- 
    bind_rows(
      tibble(column = c("cycle"), units = ""),
      filter(extracted_dt$columns, .data$column %in% names(ds$vendor_data_table))[c("column", "units")]
    )
  
  # FIXME: do this directly
  ds$vendor_data_table <- convert_df_units_attr_to_implicit_units(ds$vendor_data_table)
  
  return(ds)
}
KopfLab/isoreader documentation built on Aug. 6, 2023, 9:22 p.m.