R/process_data.R

Defines functions process_data

Documented in process_data

process_data <- function(files){
  ## filenames is a list with filenamea for a data file & a sample
  ## ids file, such as that generated by the file.lookup function
  
  #Read in reference calibration and QA parameters
  cfg = init()
  refs = refRead(cfg$refFile)
  
  ## data.mod function modifies & merges the data & sample ids files
  df <- data.mod(data.file = files$data.file, 
                 ids.file = files$ids.file)
  
  # add sequence number per injection
  df$seqN = seq_along(df[,1])
  
  # pull out outlier/omit index
  oi = files$oi
  
  # continue the mdo cycle?
  mdo = TRUE
  
  #initial memory parameter guesses
  n = seq(1:8)
  o.mc = exp(-n) / 8
  h.mc = exp(-n) / 4
  mem = list(o.mc = o.mc, h.mc = h.mc)
  
  # cycle to fit/apply memory and drift corrections,
  # remove outliers, and repeat corrections 
  while(mdo){
    ## generate memory-correction terms for the data
    mem = mc.terms(df, mem, oi)
    
    ## apply memory-correction terms to the d18O data
    df$d18O_mc = mc.corr(df, mem$o.mc, "O", oi)
    
    ## apply memory-correction terms to the d2H data
    df$d2H_mc = mc.corr(df, mem$h.mc, "H", oi)
    
    ## drift.reg function calculates spline fit of the  
    ## slrm data against sequence number
    drift = drift.reg(df, refs, oi)
    
    ## data.dc function applies the drift corrections to the data
    dc = data.dc(df, drift)
    
    ## outlier detections
    oi.in = oi
    oi = outlier(dc, oi.in)
    if(all(oi.in == oi)){
      mdo = FALSE
    }
  }
  
  drift.plot(drift)
  
  message("MDO operations completed")
  dc$Outlier = !oi
  
  ## collapse values to average per port
  da = collapse(dc, oi)
  
  ## cal.reg function calculates a regression line using the known & 
  ## measured values for the d18O & d2H data separately
  cal <- cal.reg(da, refs)
  
  ## calibrates the d18O and d2H data
  d18O_cal = data.cal(da, "O", cal)
  d2H_cal = data.cal(da, "H", cal)
  
  ## combine calibrated data with da
  dcal = cbind(da, d18O_cm = d18O_cal$calMean, 
               d18O_csd = d18O_cal$calSD,
               d2H_cm = d2H_cal$calMean, d2H_csd = d2H_cal$calSD)
  
  # update message
  message("Calibration completed")
  
  flagged <- qa.flag(dcal, refs)
  ## qa.flag function evaluates the data against the predetermined
  ## qa cutoffs and flags 
  
  ## qa.summary function summarizes the qa metrics for the run
  qa.report <- qa.summary(files$data.file, refs, mem, drift,
                          cal, flagged)
  
  ## build up dataframe comparing known and measured LRM values
  ref.df = flagged[flagged$Port %in% c(2:4), c(2, 8, 6, 9, 10, 7, 11)]
  ref.comp.o = merge(ref.df[,1:4], refs$refs[,1:3])
  ref.comp.h = merge(ref.df[,c(1, 5:7)], refs$refs[,c(1, 4:5)])
  ref.comp = merge(ref.comp.o, ref.comp.h)
  ref.comp[, 2:11] = round(ref.comp[, 2:11], 2)
  
  samples.summary <- flagged[!(flagged$ID %in% refs$refs[,"ID"]),]
  ## subsets flagged df to include only non-reference data
  
  slrm.summary <- flagged[flagged$ID == refs$refs["slrm","ID"],]
  ## subsets flagged df to inlcude only data for slrm
  
  ref.all <- dc[dc$ID %in% refs$refs[,"ID"],]
  ## subsets df to include only data for references
  
  data.all <- dc[!(dc$ID %in% refs$refs[,"ID"]),]
  ## subsets df to include only non-reference data
  
  #update message
  message("QA/QC screening completed")
  
  return(list(samples.summary = samples.summary, 
              slrm.summary = slrm.summary, 
              ref.all = ref.all, 
              data.all = data.all, 
              qa.report = qa.report, 
              ref.compare = ref.comp))
}
SPATIAL-Lab/CRDSutils documentation built on Dec. 12, 2024, 3:23 a.m.