R/read_6800.R
In gasanalyzer: Import, Recompute and Analyze Data from Portable Gas Analyzers

Documented in read_6800_equations read_6800_txt read_6800_xlsx

#' Read gas-exchange equations directly from 6800 xlsx files.
#'
#' It is recommended to use [create_equations()] for recalculating gas-exchange
#' data, but under some conditions, it may be useful to apply exactly the same
#' equations as used in the xlsx data file.
#'
#' Currently, this only works for xlsx files stored by the 6800. this function
#' extracts xlsx formulas from the file and stores them in a list for use by the
#' [recalculate()] function. Note there is no guarantee that the extracted
#' equations work on any other data files. Since newer versions of the 6800
#' firmware allows defining custom equations, it is not guaranteed that all
#' equations can be extracted successfully.
#'
#' @param filename an xlsx file containing 6800 gas-exchange data
#'
#' @returns A list of gas-equations.
#'
#' In principle, this can be made to work for the 6400 as well, but since
#' that instrument uses a variation of the older xls format, it is hard to
#' get working in practice.
#'
#' @seealso [create_equations()]
#' @export
#'
#' @examples
#' example <- system.file("extdata//lowo2.xlsx", package = "gasanalyzer")
#'
#' # get equations stored in the xlsx file
#' Eqs <- read_6800_equations(example)
#'
#' #Inpect how stomatal conductance is calculated:
#' Eqs$GasEx.gsw
read_6800_equations <- function(filename) {

  parse_6800_xlsx(filename, extract_formula = TRUE,
                  extract_units = FALSE)[["formula"]] |>
    extract_6800_equations()

}

#' Extract gas-exchange equations from parsed xlsx files
#'
#' Intended for internal use.
#'
#' @param df containing formulas, extracted from an xlsx using
#'   [parse_6800_xlsx()]
#'
#' @returns a list of quosures with equations.
#'
#' @importFrom utils getParseData
#' @importFrom stats setNames
#' @seealso [create_equations()]
#' @noRd
extract_6800_equations <- function(df) {

  # parsing input maybe unsafe. We check whether the functions are anticipated
  # and parsing was successful. Additional funcs can be added using xl_to_r
  tryCatch(
    { # use base to parse as we need the keep source:
      parsedForms <- parse(text = df$fn, keep.source = TRUE)
      pd <- getParseData(parsedForms)
      # all function calls have to be members of xl_to_r:
      fdif <- setdiff(pd$text[pd$token == "SYMBOL_FUNCTION_CALL"],
                      .gasanalyzerEnv$xl_to_r)
      if (!(length(fdif) == 0))
        stop("Please report unknown function calls: ", fdif, "\n")

      out <- setNames(as.list(parsedForms), nm = df$header)
      # add an entry to indicate these quos use no units:
      out[["gasanalyzer.UseEqUnits"]] <- FALSE
      return(out)
    },
    error = function(cond) {
      message("Error extracting XL formulas:")
      message(cond)
    }
  )
}

#' Parse 6800 xlsx files and return remarks, headers, units, formulas and data.
#'
#' Intended for internal use.
#'
#' Currently, this only works for xlsx files stored by the 6800. In principle,
#' it can be made to work for the 6400 as well, but since that instrument uses a
#' variation of the older xls format, it is hard to get working in practice.
#'
#' @param filename an xlsx file containing 6800 gas-exchange data
#'
#' @returns a list of dataframes with remarks, headers, units, formulas and data
#'
#' @importFrom tidyxl xlsx_cells
#' @importFrom utils getParseData modifyList
#' @importFrom stats setNames reshape
#' @importFrom vctrs vec_split vec_rbind vec_cbind
#' @importFrom utils compareVersion
#' @noRd
parse_6800_xlsx <- function(filename, extract_formula = FALSE,
                            extract_units = TRUE) {


  allCells <- xlsx_cells(filename, include_blank_cells = FALSE)
  class(allCells) <- "data.frame"

  if (any(allCells$sheet=="Measurements"))
    msheet <- "Measurements"
  else
    msheet <- allCells$sheet[1]

  # load head calibration from remarks
  remarksDF <- allCells[allCells$sheet == "Remarks" & allCells$col < 3L, ]
  remarksDF$X <- blend(remarksDF$character, as.character(remarksDF$numeric),
                       as.character(remarksDF$logical))
  remarksDF <- reshape(remarksDF[,c("row","col", "X")], direction="wide",
                       timevar = "col", idvar = "row", sep="", v.names="X")

  bluestem <- remarksDF$X2[remarksDF$X1 == "Console ver"]

  if (!grepl("Bluestem", bluestem, fixed = TRUE)) {
    warning(filename, " is not a valid 6800 xlsx file.\n")
    list(remarks = NA, headers = NA, units = NA,
         formula = NA, consts = NA, data = NA)
  } else {
    vers <- strsplit(bluestem,"v.")[[1]][2]
    if (is.na(numeric_version(vers, strict = FALSE)) ||
        compareVersion(vers, "2") < 0)
      warning(filename, " created by an unsupported firmware version (v",
              vers,").\n")
  }

  allCells <- allCells[allCells$sheet == msheet,
                       c("address", "row", "col", "error", "logical",
                         "character", "numeric", "formula", "data_type",
                         "content")]

  # this row has the header after which the measurement data is found:
  datastart <- allCells$row[!is.na(allCells$character) &
                              tolower(allCells$character) == "obs"] - 1L

  if (length(datastart) < 1L) {
    warning("No data rows in ", filename, ".")
    return(list(remarks = remarksDF, headers = NA, units = NA,
                formula = NA, consts = NA, data = NA))
  } else if (length(datastart) > 1L) {
    warning("Parse error in ", filename,
            ": Multiple rows with obs header unsupported! Ignoring data.\n")
    return(list(remarks = remarksDF, headers = NA, units = NA,
                formula = NA, consts = NA, data = NA))
  }

  datastart <- datastart[1L]

  # Measurement rows are identified by col 1 having a numeric value
  datarows <- allCells$row[!is.na(allCells$numeric) & allCells$col == 1L]
  headerrows <- rep(datastart, each = 3L) + 0:2L

  # create a df for our constants with names and addresses
  constsDA <- allCells[!(allCells$row %in% c(headerrows, datarows)), ]
  # consts blocks start on lines that have characters in col1
  constsDA$n <- cumsum(!is.na(constsDA$character) & constsDA$col == 1L)
  # split the df by grp, add col with the header name for the header row
  # and remove the original line with the header name
  constsDA <- lapply(vec_split(constsDA, constsDA$n)[[2]], function(x)
  { x$group <- replace(NA, x$row == x$row[1L],
                      x$character[1L]); x[-1L, ]}) |>
    do.call(vec_rbind, args = _)

  constsDF <- constsDA[!is.na(constsDA$group),
                       c("character", "group", "row", "col")]
  # create header names by merging the beheaded header
  # with the character cells to the right of them
  constsDF$header <- do.call(paste, c(constsDF[c("group", "character")],
                                      sep = ".")) |>
    rename_header("Li6800")

  # assumption: header is 1 row, data starts after that
  # and *is also only one row* (keep row integer!)
  constsDF$row <- constsDF$row + 1L
  # join back headers to constsDA
  constsDF <- constsDF[c("header", "row", "col", "group")]  |>
    merge(constsDA[, !names(constsDA) %in% c("group", "n"), drop = FALSE],
          all.x = T, sort = F, by = c("row", "col"))
  # we are not interested in row differences before datastart
  constsDF$row <- replace(constsDF$row,
                         constsDF$row <= headerrows[length(headerrows)],
                         headerrows[length(headerrows)] + 1L)

  # with many rows, we need a fast way to combine 2 header rows and
  # match headers to cols. Also should allow for missing headers...
  headerDF <- allCells[allCells$row == headerrows[1L] |
                         allCells$row == headerrows[2L],
                       c("character", "row", "col")] |>
    reshape(direction = "wide", timevar = "row", idvar = "col",
            v.names = "character")
  #change category for old firmware (pre 2?):
  headerDF[headerDF[[2L]] == "Sys", 2] <- "SysObs"
  headerDF$header <- paste(headerDF[[2L]], headerDF[[3L]], sep = ".") |>
    rename_header("Li6800")

  #Include datastart, this prevents that cols with no data are dropped
  #but make them NA
  dataDF <- allCells[allCells$row %in% c(datastart,datarows), ]
  dataDF$character[dataDF$row == datastart] <- NA
  # merge the double header
  idx <- match(dataDF$col, headerDF$col)
  dataDF$header <- headerDF$header[idx]
  dataDF$group <- headerDF[[2L]][idx]

  # convenient is the first row, but lets take one with formulas
  dataRow1 <- dataDF[dataDF$row == dataDF$row[!is.na(dataDF$formula)][1], ]

  # extract the formulas. I'm going to assume formulas don't change with
  # rows, except when new constants are listed between rows, but since we
  # made nice columns out of those, all formulas in a col should be
  # identical and so we use only the first row of data & consts
  # to construct R formulas
  if (extract_formula == TRUE) {

    constsRow1 <- constsDF[constsDF$row == constsDF$row[1L], ]
    # for name - address lookups:
    Row1 <- vec_rbind(dataRow1, constsRow1)[c("header", "address")]

    formDF <- dataRow1[!is.na(dataRow1$formula),
                       c("formula", "address", "header")]
    # constants become rows, there are no fixed refs anymore
    # replace fixed first. We need to strip $ before getXLrefs anyway
    formDF$formula <- stri_replace_all_fixed(formDF$formula,
                                             c("$", "TRUE()", "FALSE()"),
                                             c("",  "TRUE",   "FALSE"),
                                             vectorise_all = FALSE)

    # bind this to 2 columns containing a list with refs and derefs (names)
    formDF <- vec_cbind(formDF, t(getXLrefs(formDF$formula, Row1$address,
                                            Row1$header)))

    # now replace refs with derefs and xl_to_r functions
    formDF$fn <- stri_replace_all_regex(formDF$formula,
                                       c(paste0("\\b",
                                                unlist(formDF$refs),
                                                "\\b"),
                                         names(.gasanalyzerEnv$xl_to_r)),
                                       c(unlist(formDF$derefs),
                                         unname(.gasanalyzerEnv$xl_to_r)),
                                       vectorise_all = FALSE)

    # use a topo sort to make sure the mutate call will be ordered correctly:
    formDF <- formDF[order(tsort(formDF$address, formDF$refs)), ]
  } else
    formDF <- NA

  if (extract_units == TRUE) {
    # always keep this off:
    old_opt <- units_options("simplify")
    units_options("simplify" = NA)

    unitsDF <- allCells[allCells$row == headerrows[3L],
                        c("col", "character")]
    # replace silly things
    unitsDF$character <- stri_replace_all_fixed(unitsDF$character,
                                                c("\u207b", "\u00b2",
                                                  "\u00b9", "\u00b3", "hrs"),
                                                c("-", "2", "1", "3", "h"),
                                                vectorize_all = FALSE)
    names(unitsDF)[names(unitsDF) == "character"] <- "units"
    unitsDF$header <- dataRow1$header[dataRow1$col %in% unitsDF$col]
    units_options("simplify" = old_opt)
  } else
    unitsDF <- NA

  return(list(remarks = remarksDF, headers = headerDF, units = unitsDF,
              formula = formDF, consts = constsDF, data = dataDF))
}

#' Reads 6800 xlsx files and creates a tibble with gas-exchange data.
#'
#' The xlsx files stored by the 6800 contain measured and calculated values that
#' are read by this function and formatted in a large tibble for use with R.
#' Constants and metadata (such as calibration information) are also added as
#' columns.
#'
#' Note that values for many derived gas-exchange parameters are not
#' stored in the files, but are calculated by equations stored in the xlsx.
#' These values are 0 after importing, unless setting recalculate = TRUE. It is
#' also possible to calculate this parameters after importing using the
#' [recalculate()] function.
#'
#' Multiple files can be loaded by calling the function with [lapply()] or
#' [purrr::map()] to merge multiple files. In this case, it is important
#' to ensure that the column names will match. Recalculation can be disabled
#' for speed, and instead applied to the merged data using [recalculate()].
#'
#' @param filename an xlsx file containing 6800 gas-exchange data.
#' @param recalculate character string indicating whether or not to recalculate
#'   data using equations from the xlsx file.
#'
#' @returns A tibble with gas-exchange data in columns.
#'
#' @importFrom tidyxl xlsx_cells
#' @importFrom stringi stri_replace_all_fixed stri_replace_all_regex
#'   stri_split_fixed
#' @importFrom units set_units units_options
#' @importFrom tibble tibble as_tibble
#' @importFrom jsonify from_json
#' @importFrom tools file_path_sans_ext
#' @importFrom vctrs vec_split vec_rbind
#'
#' @seealso [recalculate()]
#'
#' @export
#'
#' @examples
#' exampledir <- system.file("extdata", package = "gasanalyzer")
#' # import factory calibration for example data:
#' import_factory_cals(exampledir)
#'
#' # read data:
#' li6800 <- read_6800_xlsx(paste0(exampledir, "//lowo2.xlsx"))
#' li6800_norecalc <- read_6800_xlsx(paste0(exampledir, "//lowo2.xlsx"),
#'   recalculate = FALSE)
#' li6800_norecalc$gasanalyzer.Equations <-
#'   list(read_6800_equations(paste0(exampledir, "//lowo2.xlsx")))
#'
#' all.equal(li6800, recalculate(li6800_norecalc), check.attributes = FALSE)
#'
#'
read_6800_xlsx <- function(filename, recalculate = TRUE) {

  DFs <- parse_6800_xlsx(filename, extract_formula = recalculate,
                         extract_units = TRUE)

  # convenience:
  reDF <- DFs[["remarks"]]
  dtDF <- DFs[["data"]]
  ctDF <- DFs[["consts"]]
  unDF <- DFs[["units"]]

  serialnumber <- reDF$X2[reDF$X1 == "Head s/n"]
  headcal <- from_json(reDF$X2[reDF$X1 == "Head cal"])

  if (length(headcal) > 1L)
    # i'll keep this as char list for now
    headcal <- rapply(headcal, as.character, how = "unlist")
  else {
    warning("Error reading head calibration from xlsx.")
  }

  filedate <- reDF$X2[reDF$X1 == "File opened"]
  if (length(filedate) == 0L) {
    warning("Error reading file date from ", filename, ".\n")
    filedate <- Sys.time()
  } else {
    # lack of tz info is cumbersome
    filedate <- as.POSIXct(filedate, tz = Sys.timezone(),
                           format = "%Y-%m-%d %H:%M:%S")
  }

  # some files have no data
  if (is.na(DFs["data"]))
    return(tibble())

  # first row (which importantly has still the header!!)
  dataRow1 <- dtDF[dtDF$row == dtDF$row[1L], ]

  constsRow1 <- ctDF[ctDF$row == ctDF$row[1L], ]
  # not sure this is really needed as the same info will be in measDF, however
  # those are not necessarily unique?
  #Row1 <- vec_rbind(dataRow1, constsRow1)[c("header", "address", "group")]

  # It may occasionally be useful to use the calculated formula values
  # the files are actually exported uncalculated by the instrument, but
  # for QA a comparison with a spreadsheet calc could be made
  formulaVector <- !is.na(dtDF$formula)
  # assume relevant formula content is numeric...
  # This slightly breaks for Const.UseDynamic
  dtDF$numeric[formulaVector] <- as.numeric(dtDF$content[formulaVector])

  #add metadata here:
  #FIXME: if cal date (firmware 2.1.11), then use that!
  factcal <- list(get_factory_cals(serialnumber, filedate)[,1])

  metadata <- list(SysObs.Filename = file_path_sans_ext(basename(filename)),
                   SysObs.Instrument = list("Li6800"),
                   SysConst.UserCal = list(headcal),
                   SysConst.FactCal = factcal)
  #NB: needs to match metadata above:
  grp <- c("SysObs", "SysObs", "SysConst", "SysConst")

  metaDF <- data.frame(row = ctDF$row[1L], header = names(metadata),
                       group = grp)
  metaDF$value <- metadata
  dtDF$isData <- TRUE
  # Following steps are costly if many rows
  measDF <- vec_rbind(dtDF, ctDF, metaDF)

  # none and "-" are sometimes used
  # one could debate whether this means 0 or NA, i'll go
  # with NA for now
  measDF$character <- replace(measDF$character,
                              measDF$character %in% c("none", "-"),
                              NA)

  exclude_meta <- seq(1, nrow(measDF) - nrow(metaDF))
  # somewhat slow:
  measDF$value[exclude_meta] <- blend(as.list(measDF$character[exclude_meta]),
                                      as.list(measDF$numeric[exclude_meta]),
                                      as.list(measDF$logical[exclude_meta]))

  # preserve row order because in a next step we are using fill for the
  # Consts rows interjected in between data rows
  measDF <- measDF[order(measDF$row), c("row", "col", "header", "value",
                                        "isData")]

  # not entirely happy with this, but seems faster than alternatives:
  measDF$rowix <- as.integer(as.factor(measDF[["row"]]))
  #fix names to unified names and remove funny chars:

  #this makes unique names at the same time
  tmp <- vec_split(measDF, measDF[["header"]])

  #since every col should have the same nr
  #of rows, cant we just complete them? here? (and save a fill later?)

  #initialize the output
  uniRows <- unique(measDF[["row"]])
  out <- rep_len(list(rep(NA, length(uniRows))), length(tmp$val) + 1)
  names(out) <- c("row", tmp$key)
  out$row <- uniRows
  #this vector stores names for filling down later:
  constsAndMeta <- rep(NA_character_, length(tmp$val))

  # always keep this off:
  old_opt <- units_options("simplify")
  units_options("simplify" = NA)

  #and allocate:
  for (i in seq_along(tmp$val)) {
    nm <- tmp$key[i]
    x <- tmp$val[[i]]
    out[[nm]][x$rowix] <- unlist(x$value, recursive = FALSE)

    if(is.na(x$isData[1])) {
      constsAndMeta[i] <- nm
      #only continue the loop for data:
      next
    }

    tmpu <- unDF$units[unDF$col == x$col[1L]]
    if (length(tmpu) != 0L)
      out[[nm]] <-
      # need to guard set_units as its easy to break
      tryCatch(set_units(as.numeric(out[[nm]]), tmpu,
                         mode = "standard"),
               error = function(e) {
                 warning("Failed to apply unit specified in data file: ",
                         tmpu, ", to col ", nm, "\n")
                 out[[nm]]
               })
  }
  #tibble, and drop "row":
  out <- as_tibble(out[-1L])

  # fill constants down
  constsAndMeta <- constsAndMeta[!is.na(constsAndMeta)]
  out[constsAndMeta] <- filldown(out[constsAndMeta])
  # only now drop rows that were related to Consts but had no obs (col 1)
  out <- out[!is.na(out[[1L]]), ]

  #Apertures get special treatment to remove unnecessary units:
  out["ChambConst.Aperture"] <- gsub("^([0-9.]*)(.*)$", "\\1",
                                  g0("ChambConst.Aperture", NA,
                                     envir = list2env(out)))
  # firmware bug: wrong units cf_a (fixed >=2.1.11)
  # FIXME: header says mmol should be umol, fix this better
  out["MchStatus.CFaCO2"] <- as.numeric(g0("MchStatus.CFaCO2", NA,
                                           envir = list2env(out)))

  if (recalculate) {
    eqs <- extract_6800_equations(DFs[["formula"]])
    out$gasanalyzer.Equations <- list(eqs)
    out <- recalculate(out)
  }
  out <- fixup_import(out) |>
    # add O2 uncorrected vals, this is a bit time consuming
    calculate_raw()

  units_options("simplify" = old_opt)
  #sort and return:
  out[sort_names(names(out))]

}

#' Reads 6800 text files and creates a tibble with gas-exchange data.
#'
#' The text files stored by the 6800 contain measured and calculated values that
#' are read by this function and formatted in a large tibble for use with R.
#' Constants and metadata (such as calibration information) are also added as
#' columns. Note that no recalculation of derived variables is performed,
#' although it is possible to so using [recalculate()] after importing the data.
#'
#' Multiple files can be loaded by calling the function with [lapply()] or
#' [purrr::map()] to merge multiple files. In this case, it is important
#' to ensure that the column names will match.
#'
#' @param filename an text file containing 6800 gas-exchange data.
#'
#' @returns A tibble with gas-exchange data in columns.
#'
#' @importFrom stringi stri_replace_all_fixed stri_replace_all_regex
#'   stri_split_fixed stri_detect_regex
#' @importFrom tibble tibble as_tibble
#' @importFrom units set_units units_options
#' @importFrom jsonify from_json
#' @importFrom tools file_path_sans_ext
#'
#' @seealso [recalculate()]
#'
#' @export
#'
#' @examples
#' exampledir <- system.file("extdata", package = "gasanalyzer")
#' # import factory calibration for example data:
#' import_factory_cals(exampledir)
#'
#' # read data
#' li6800 <- read_6800_xlsx(paste0(exampledir, "//lowo2.xlsx"))
#' li6800_txt <- read_6800_txt(paste0(exampledir, "//lowo2"))
#'
#' # compare all except equations. Note txt file reports some NAs as zero:
#' columns_to_check <- names(li6800)[!names(li6800) %in%
#'                                    c("gasanalyzer.Equations")]
#' all.equal(li6800[columns_to_check],
#'           li6800_txt[columns_to_check],
#'           tol = 0.01)
#'
read_6800_txt <- function(filename) {

  # there are various faster options, but readLines isn't that slow for
  # typical files. Note that some files seem nul-truncated
  rawfile <- readLines(filename, skipNul = TRUE)

  # find sections. search only first 1000 lines for header, and
  # assume header < 1000 lines
  rfl <- length(rawfile)
  header_start <- which(rawfile[1:min(rfl, 1000)] == "[Header]")
  if (length(header_start) == 0L) {
    warning("No [Header] section in first 1000 lines, ignoring",
            filename, ".\n")
    return(tibble())
  }

  data_start <- which(rawfile[header_start:min(rfl, header_start +
                                                 1000)] == "[Data]")
  if (length(data_start) == 0L) {
    warning("No [Data] section found, ignoring", filename, ".\n")
    return(tibble())
  }
  #stri instead of strsplit because we want to split in exactly 2:
  remarks <- stri_split_fixed(rawfile[(header_start + 1):(data_start - 1)],
                              pattern = "\t", n = 2, simplify = T,
                              omit_empty = TRUE)

  # get metadata
  bluestem <- remarks[remarks[,1] == "Console ver", 2]
  if (length(bluestem) == 0 && !grepl("Bluestem", bluestem, fixed = TRUE)) {
    warning(filename, " is not a valid 6800 file.\n")
    tibble()
  }
  filedate <- remarks[remarks[,1] == "File opened", 2]
  if (length(filedate) == 0L) {
    warning("Error reading file date from", filename, ".\n")
    filedate <- Sys.time()
  } else {
    # lack of tz info is cumbersome
    filedate <- as.POSIXct(filedate, tz = Sys.timezone(),
                           format = "%Y-%m-%d %H:%M:%S")
  }

  serialnumber <- remarks[remarks[,1] == "Head s/n", 2]
  headcal <- from_json(remarks[remarks[,1] == "Head cal", 2])

  if (length(serialnumber) > 0L && length(headcal) > 1L) {
    # i'll keep this as char list for now
    headcal <- rapply(headcal, as.character, how = "unlist")
  } else {
    warning("Error reading head calibration from", filename, ".\n")
    headcal <- NA
  }

  rawdata <- rawfile[(data_start + 1):rfl]
  # data start with a number
  # only for larger files, stringi is faster than grepl
  datarows <- stri_detect_regex(rawdata,"^[[:digit:]]+\t")
  # keep headers and consts for the moment:
  datamat <- stri_split_fixed(rawdata, pattern = "\t", simplify = TRUE)

  if (!any(datarows)) {
    warning("No data rows in ", filename, ".\n")
    return(tibble())
  }

  # remove cols that are completely empty (meanly to work around bugs
  # happening during file creation...)
  datamat <- datamat[ , colSums(datamat != "") != 0]
  # initial constants (did anybody think about how to distinguish consts
  # from remarks and other info?). I'm matching for two groups of characters
  # separated by :, but not exclusively numbers:
  # NB: they may always have Const: in the name, but that is not written
  # anywhere?
  consts_in <- stri_detect_regex(remarks[ , 1], "^(?![0-9]+).+\\:(?![0-9]+).+")
  remarks[consts_in, 1] <-
    stri_replace_all_fixed(remarks[consts_in, 1], ":", ".")

  #consts in data:
  cdata <- datamat[!datarows, 1:2]

  consts_dt <- stri_detect_regex(cdata[ ,1], "^(?![0-9]+).+\\:(?![0-9]+).+")
  cdata[consts_dt, 1] <-
    stri_replace_all_fixed(cdata[consts_dt, 1], ":", ".")
  #grp change for old firmware (pre 2?)
  datamat[1, datamat[1, ] == "Sys"] <- "SysObs"
  header <- paste(datamat[1,], datamat[2,], sep = ".")
  #Remove empty headers from our data matrix!
  nohead <- header == "."
  # BUG workaround: if the last column has data but no header, it could be due
  # to a missing Const.Custom (in mol m⁻² s⁻¹) column
  # FIXME: do more testing for this bug, it may be even worse...
  if (nohead[length(nohead)]) {
    if (all(header != "Consts.Custom")) {

      #find the custom col:
      cuscol <- which(header == "Const.Geometry")
      if (length(cuscol) > 0) {
        cuscol <- cuscol + 1
        lastcol <- length(nohead)
        #for rows where the last col is not empty
        #bring the spurious value to the end (i.e. rotate those vectors)
        badrows <- datamat[ ,lastcol] != ""
        datamat[badrows, (cuscol):lastcol] <- datamat[badrows,
                                                      c((cuscol+1):lastcol,
                                                        cuscol), drop = F]
        #but only replace from Const.Geometry onwards

        #rest of the col gets NA
        datamat[!badrows, lastcol] <- NA
        datamat[3, lastcol] <- "mol*m^-2*s^-1"
        header[lastcol] <- "Const.CustomBUG"
        nohead[lastcol] <- F
      }
    } else
      stop(filename, "is not a valid 6800 data file.")
  }
  datamat <- datamat[, !nohead]
  # these seem to mean usually NA, I hope it isn't too blunt:
  datamat <- replace(datamat, datamat %in% c("none", "-"), NA)
  header <- header[!nohead] |>
    rename_header("Li6800") |> make.unique()
  # and apply:
  colnames(datamat) <- header

  # always keep this off:
  old_opt <- units_options("simplify")
  units_options("simplify" = NA)

  # units (units without headers ignored!)
  unitsvec <- datamat[3, ] |>
    # remove silly things
    stri_replace_all_fixed(c("\u207b", "\u00b2", "\u00b9", "\u00b3", "hrs"),
                           c("-", "2", "1", "3", "h"), vectorize_all = FALSE)
  names(unitsvec) <- header

  #empty (but don't cut yet) headers:
  datamat[1:3, ] <- ""

  conheader <- c(remarks[consts_in, 1], cdata[consts_dt, 1]) |>
    rename_header("Li6800")
  convals <- c(remarks[consts_in, 2], cdata[consts_dt, 2])
  # put initial constants on row 1 (the header has been emptied already)
  # The data-interspaced consts go to the rows on which they are found.
  # (not on datarows, and only on those identified by consts_dt)
  conrow <- c(rep(1, sum(consts_in)),
             (seq_len(nrow(datamat)))[!datarows][consts_dt])
  # Reshape and combine with data in one go

  urow <- seq_len(nrow(datamat))
  headerall <-  c(header, conheader)
  uheaderall <- unique(headerall)
  uconheader <- unique(conheader)
  condat <- matrix(nrow = length(urow),
                   ncol = length(uheaderall),
                   dimnames = list(urow, uheaderall))
  condat[, header] <- datamat[ , header]
  condat[cbind(conrow, conheader)] <- convals

  #remaining empty cells are really NA:
  condat[condat == ""] <- NA
  # the filling and slicing may be faster on matrix, but my filldown
  # doesn't support that (yet)
  condat <- as_tibble(condat)
  #bug workaround part 2:
  if (length(condat[["Const.CustomBLC"]]) > 0) {
    #this happens sometimes, but i've no clue what the correct value should be!
    #condat[["Const.CustomBLC"]][condat[["Const.CustomBLC"]] == "True"] = "1"
    #condat[["Const.CustomBLC"]][condat[["Const.CustomBLC"]] == "False"] = "0"
    if (length(condat[["Const.CustomBUG"]]) > 0) {
      condat$Const.CustomBUG[condat$Const.CustomBUG == ""] <- NA
      condat$Const.CustomBLC <- blend(condat$Const.CustomBUG,
                                      condat$Const.CustomBLC)
      condat$Const.CustomBUG <- NULL
    }
  }
  condat[uconheader] <- filldown(condat[uconheader])
  condat <- condat[datarows,]
  condat <- units_convert(condat, unitsvec)

  condat["SysObs.Filename"] <- file_path_sans_ext(basename(filename))
  #note [[]] here
  condat[["SysConst.UserCal"]] <- list(headcal)
  #FIXME: if cal date (firmware 2.1.11), then use that!
  condat[["SysConst.FactCal"]] <- list(get_factory_cals(serialnumber,
                                                      filedate)[ , 1])
  condat[["gasanalyzer.Equations"]] <- list(NA)

  condat["SysObs.Instrument"] <- "Li6800"
  #Apertures get special treatment to remove unnecessary units:
  condat["ChambConst.Aperture"] <- gsub("^([0-9.]*)(.*)$", "\\1",
                                        g0("ChambConst.Aperture", NA_real_,
                                           envir = list2env(condat)))
  #firmware bug: wrong units cf_a (fixed >=2.1.11)
  #FIXME: may need a better fix, see above
  condat["MchStatus.CFaCO2"] <- as.numeric(g0("MchStatus.CFaCO2", NA_real_,
                                              envir = list2env(condat)))

  condat <- fixup_import(condat) |>
    # add O2 uncorrected vals, this is a bit time consuming
    calculate_raw()

  units_options("simplify" = old_opt)
  condat[sort_names(names(condat))]
}
Any scripts or data that you put into this service are public.
gasanalyzer documentation built on May 29, 2024, 8:14 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
gasanalyzer
Import, Recompute and Analyze Data from Portable Gas Analyzers

R/read_6800.R
In gasanalyzer: Import, Recompute and Analyze Data from Portable Gas Analyzers

Defines functions read_6800_txt read_6800_xlsx parse_6800_xlsx extract_6800_equations read_6800_equations

Documented in read_6800_equations read_6800_txt read_6800_xlsx

Try the gasanalyzer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gasanalyzer Import, Recompute and Analyze Data from Portable Gas Analyzers

R/read_6800.R In gasanalyzer: Import, Recompute and Analyze Data from Portable Gas Analyzers

Defines functions read_6800_txt read_6800_xlsx parse_6800_xlsx extract_6800_equations read_6800_equations

Documented in read_6800_equations read_6800_txt read_6800_xlsx

Try the gasanalyzer package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gasanalyzer
Import, Recompute and Analyze Data from Portable Gas Analyzers

R/read_6800.R
In gasanalyzer: Import, Recompute and Analyze Data from Portable Gas Analyzers