CRDSutils: Picarro L21X0-i Utilities

###                      Correct Functions                        ###
###         These functions are used to take data produced        ###
###         by a Picarro CRDS L2130i Isotopic H2O analyzer        ###
###         for liquid water analyses and memory correct,         ### 
###         and calibrate the raw data points, then average       ###
###         the injections for each port, and drift correct       ### 
###         the mean values. Finally, there are functions         ###
###         that quality-check the final values                   ###

refRead = function(refFile){
  ## reads in qa file
  qa <- read.csv(refFile, stringsAsFactors=FALSE)

  criteria = list(
  #slrm d18O max acceptable value
    slrm.o.max = qa$max[qa$parameter=="slrm_O_range"],
  #slrm d18O min acceptable value  
    slrm.o.min = qa$min[qa$parameter=="slrm_O_range"],
  #slrm d2H max acceptable value
    slrm.h.max = qa$max[qa$parameter=="slrm_H_range"],
  #slrm d2H min acceptable value
    slrm.h.min = qa$min[qa$parameter=="slrm_H_range"],
  #slrm d18O max acceptable sd
    slrm.o.sd.max = qa$max[qa$parameter=="slrm_O_sd"],
  #slrm d2H max acceptable sd
    slrm.h.sd.max = qa$max[qa$parameter=="slrm_H_sd"],
  #sample d18O max acceptable sd
    sample.o.sd.max = qa$max[qa$parameter=="sample_O_sd"],
  #sample d2H max acceptable sd
    sample.h.sd.max = qa$max[qa$parameter=="sample_H_sd"])

  refs = data.frame(ID = c(qa$ID[qa$parameter=="plrm1"],
                            qa$ID[qa$parameter=="plrm2"], 
                            qa$ID[qa$parameter=="slrm"]),
                    o.known = c(qa$d18O_known[qa$parameter=="plrm1"],
                                qa$d18O_known[qa$parameter=="plrm2"],
                                qa$d18O_known[qa$parameter=="slrm"]),
                    o.known.sd = c(qa$d18O_sd[qa$parameter=="plrm1"],
                                   qa$d18O_sd[qa$parameter=="plrm2"],
                                   qa$d18O_sd[qa$parameter=="slrm"]),
                    h.known = c(qa$d2H_known[qa$parameter=="plrm1"],
                                qa$d2H_known[qa$parameter=="plrm2"],
                                qa$d2H_known[qa$parameter=="slrm"]),
                    h.known.sd = c(qa$d2H_sd[qa$parameter=="plrm1"],
                                   qa$d2H_sd[qa$parameter=="plrm2"],
                                   qa$d2H_sd[qa$parameter=="slrm"]),
                    row.names = c("plrm1", "plrm2", "slrm"),
                    stringsAsFactors = FALSE)
  
  return(list(refs = refs, criteria = criteria))
}

## Reads in the relevant files, modifies and merges them
data.mod <- function(data.file, ids.file){
  ##  data.file is the filename of a csv with the isotope
  ##  data produced by a Picarro CRDS with the following columns: 
  ##  Port, Inj Nr, d(18_16)Mean, d(D_H)Mean. 
  ##  Additional columns may be present in csv, but will not be used 
  ##  in function
  ##
  ##  ids.file is the filename of a csv with 4 columns - 
  ##  tray, Port (numeric, i.e. 1,2,etc.), sample ID, and sample 
  ##  campaign or secondary identifier. Column names may vary but   
  ##  the order may not. Additional columns are not allowed and the 
  ##  number of samples should match the number of samples in the  
  ##  isotope file. An example can be seen if you run the following 
  ##  > data.frame(Tray=rep(1,3), Port=c(1,2,3), 
  ##  > ID = c("PT","PZ","UT"), 
  ##  > ID2 = c("SLRM","PLRM-1","PLRM-2"))

	ids <- read.csv(ids.file,stringsAsFactors=FALSE,
                  strip.white=TRUE) 
  ## reads in ids.file
  
  ids <- data.frame(Port = ids[,2], ID = ids[,3], ID2 = ids[,4])
	##selects and renames the relevant columns from ids
  
	iso.data <- read.csv(data.file, stringsAsFactors=FALSE, 
                       strip.white=TRUE) 
  ## reads in data.file   
  
	iso.data <- data.frame(Port = iso.data$Port, 
                         inj = iso.data$Inj.Nr,
                         d18O = iso.data$d.18_16.Mean, 
                         d2H = iso.data$d.D_H.Mean) 
  ##selects and renames the relevant columns from iso.data
  
	iso.data$Port<-as.numeric(regmatches(iso.data$Port,                    
             regexpr("[0-9][0-9]", iso.data$Port))) 
  ##converts the last 2 digits of the port id to numeric
  
	iso.data <- merge(iso.data,ids,stringsAsFactors=FALSE) 
  ##merges iso.data & ids
  
	iso.data <- iso.data[with(iso.data,order(Port,inj)),] 
  ##orders iso.data by Port & inj
  
  return(iso.data)
}

## Generates memory-correction terms for d18O & d2H 
mc.terms <- function(data, mem, oi){ 
  #ensure sorting by seqN
  data = data[order(data$seqN),]
  
  #numerical optimization for d18O
  o.mc.opt = optim(mem$o.mc, mc.score, data = data, element = "O",
                   oi = oi, method = "L-BFGS-B", lower = 0)
  #check for convergence
  if(o.mc.opt$convergence != 0){
    warning("O memory correction did not converge")
  }
  
  #same for d2H
  h.mc.opt = optim(mem$h.mc, mc.score, data = data, element = "H",
                   oi = oi, method = "L-BFGS-B", lower = 0)
  if(h.mc.opt$convergence != 0){
    warning("H memory correction did not converge")
  }

  #return only mc parameter values
  return(list(o.mc = o.mc.opt$par, h.mc = h.mc.opt$par))
}

## Scores the memory correction for use in optimization 
mc.score = function(mc, data, element, oi){
  #run the correction using the current parameters
  data.mc = mc.corr(data, mc, element, oi)
  
  #add the port numers to the memory corrected values
  data.mc = data.frame(Port = data$Port, mc = data.mc)
  
  #get variance values per port
  da = aggregate(data.mc[oi,"mc"], 
                 by = list(Port = data.mc$Port[oi]),
                 var, na.rm = TRUE)
  
  #names for easy reference
  names(da) = c("Port", "Var")
  
  #return score is the variance of the deviations
  return(mean(da$Var, na.rm = TRUE))
}

## Conducts memory corretion for one element
mc.corr = function(data, mc, element, oi){
  
  #ensure sorting by seqN
  data = data[order(data$seqN),]
  
  #extract uncorrected values for target element
  if(element == "O"){
    vals = data$d18O
  } else {
    vals = data$d2H
  }
  
  #storage for corrected values
  vals.mc = rep(NA, length(vals))
  
  #cycle through each injection, skipping conditioners
  for(i in 11:length(vals)){
    #subset of vals allows reference wrt to the target injection
    vals.sub = vals[1:i]
    
    #remove any missing injections; memory persists through these
    vals.sub = vals.sub[!is.na(vals.sub)]
    
    #length of vals.sub for convenience
    vsl = length(vals.sub)
    
    #check that there are enough injections, otherwise trim mc
    if(vsl == 0){
      vals.mc[i] = vals[i]
      break()
    } else if(vsl - 1 < length(mc)){
      mc.sub = mc[1:(vsl-1)]
    } else{
      mc.sub = mc
    }
    
    #correction term is the sum of preceeding peaks weighted
    #by mc parameters
    corr = sum(mc.sub * vals.sub[(vsl-1):(vsl-length(mc.sub))])
    
    #weights scale to 1-true value weight
    wts = 1 - sum(mc.sub)
    
    #make the correction
    vals.mc[i] = (vals[i] - corr) / wts
  }
  
  #return vector of corrected values
  return(vals.mc)
}

## Calculates drift spline fit for d18O and d2H
drift.reg <- function(data, refs, oi){
  ## data is the dataframe created using the data.mod function and 
  ## updated using the data.mc function and data.cal function
  ## qa.file is the filename of a csv as described in the cal.reg 
  ## function

  #Remove outliers
  data = data[oi,]
  
  data.slrm = data[data$ID == refs$refs["slrm", "ID"] & 
                     data$Port > 1,]
  ## subsets data to include only slrm water and excluding port 1
  
  data.slrm$seqN = data.slrm$seqN - 20
  ## subtracts 20 from sequence
  ## number so that the calibration midpoint is 0
  
  #get mean values per port
  data.slrm = aggregate(data.slrm[, c("d18O_mc", "d2H_mc", "seqN")], 
                 by = list(Port = data.slrm$Port),
                 mean, na.rm = TRUE)
  
  # set degrees of freedom for spline
  sdf = ceiling(nrow(data.slrm)/2)
  
  if(nrow(data.slrm) < 4){
    ## spline fits to slrm port averages, hacked for small runs
    o = smooth.spline(seq(1:100), rep(mean(data.slrm$d18O_mc), 100), 
                      df = sdf)
    h = smooth.spline(seq(1:100), rep(mean(data.slrm$d2H_mc), 100),
                      df = sdf)   
  } else{
    ## spline fits to slrm port averages
    o = smooth.spline(data.slrm$seqN, data.slrm$d18O_mc, 
                      df = sdf)
    h = smooth.spline(data.slrm$seqN, data.slrm$d2H_mc,
                      df = sdf)
  }
  
  #return spline objects
  return(list(o = o, h = h, data.slrm = data.slrm))
}

## Plots drift correction
drift.plot = function(dc){
  
  par(mar = c(5,5,3,5))
  plot(dc$data.slrm$seqN, dc$data.slrm$d18O_mc, pch=21, bg = "light blue",
       ylab = "", xlab = "Sequence number", main = "Drift Correction")
  mtext(expression(delta^{18}*"O"), 2, 3, col = "light blue")
  op = predict(dc$o, floor(min(dc$data.slrm$seqN)):
                 ceiling(max(dc$data.slrm$seqN)))
  lines(op, col = "light blue")
  par(new = TRUE)
  plot(dc$data.slrm$seqN, dc$data.slrm$d2H_mc, pch=21, bg = "red", 
       axes = FALSE, ylab = "", xlab = "")
  axis(4)
  mtext(expression(delta^{2}*"H"), 4, 3, col = "red")
  hp = predict(dc$h, floor(min(dc$data.slrm$seqN)):
                 ceiling(max(dc$data.slrm$seqN)))
  lines(hp, col = "red")

  return(NULL)
}

## Drift-corrects data
data.dc <- function(data, drift){
  ## data is the dataframe created using the data.mod function and 
  ## updated using the data.mc function and data.cal function
  ## drift is the list created using the drift.reg function
  
  data$seqN <- data$seqN - 20
  ## subtracts 20 from sequence
  ## number so that the calibration midpoint is 0
  
  data$d18O_dc <- data$d18O_mc - (predict(drift$o, data$seqN)$y -
                                    predict(drift$o, 0)$y)
  ## calculates drift-corrected values for d18O
  
  data$d2H_dc <- data$d2H_mc - (predict(drift$h, data$seqN)$y -
                                  predict(drift$h, 0)$y)
  ## calculates drift-corrected values for d2H
  
  return(data)
}

## Detect outliers and return index for detections
outlier = function(data, oi.in){
  
  #copy oi.in
  oi = oi.in
  
  #get aggregated data by port
  da = collapse(data, oi.in)
  
  #merge aggregated values to full data
  data = merge(data, da, by.x = c("Port", "ID", "ID2"), 
               by.y = c("Port", "ID", "ID2"), all = TRUE)
  
  #re-sort
  data = data[order(data$seqN),]
  
  #data minus current outliers
  data.ok = data[oi.in,]
  
  #differences between individual injections and averages
  data.ok$d18O_diff = data.ok$d18O_dc - data.ok$d18O_avg
  data.ok$d2H_diff = data.ok$d2H_dc - data.ok$d2H_avg
  
  #standard deviations of the differences
  O_diff.sd = sd(data.ok$d18O_diff, na.rm = TRUE)
  H_diff.sd = sd(data.ok$d2H_diff, na.rm = TRUE)
  
  #Z-scores for the differences
  data.ok$d18O_diff.Z = data.ok$d18O_diff / O_diff.sd
  data.ok$d2H_diff.Z = data.ok$d2H_diff / H_diff.sd
  
  #space to store row nums of outliers
  oi.o = oi.h = NULL
  
  #if any injection is a d18O outlier (beyond 3.5 SD)
  if(any(abs(data.ok$d18O_diff.Z) > 3.5)){
    #store the row number for the most extreme outler
    oi.o = match(max(abs(data.ok$d18O_diff.Z)), 
                 abs(data.ok$d18O_diff.Z))
  }
  #same for d2H
  if(any(abs(data.ok$d2H_diff.Z) > 3.5)){
    oi.h = match(max(abs(data.ok$d2H_diff.Z)), 
                 abs(data.ok$d2H_diff.Z))
  }
  
  # if any outliers are detected
  if(!is.null(oi.o) & !is.null(oi.h)){
    # plot the sequence and the most extreme
    par(mar = c(5,5,3,5))
    plot(data.ok$seqN, data.ok$d18O_diff.Z, 
         pch=21, bg = "light blue", ylab = "", 
         xlab = "Sequence number", main = "Outlier Plot")
    if(!is.null(oi.o)){
      points(data.ok$seqN[oi.o], data.ok$d18O_diff.Z[oi.o], pch=21,
           bg = "black", col = "light blue")
    }
    mtext(expression(delta^{18}*"O"), 2, 3, col = "light blue")
    abline(3.5, 0, lty=2, col = "light blue")
    abline(-3.5, 0, lty=2, col = "light blue")
    par(new = TRUE)
    plot(data.ok$seqN, data.ok$d2H_diff.Z, 
         pch=21, bg = "red", axes = FALSE,
         ylab = "", xlab = "")
    if(!is.null(oi.h)){
      points(data.ok$seqN[oi.h], data.ok$d2H_diff.Z[oi.h], pch=21,
           bg = "black", col = "red")
    }
    axis(4)
    mtext(expression(delta^{2}*"H"), 4, 3, col = "red")
    abline(3.5, 0, lty=2, col = "red")
    abline(-3.5, 0, lty=2, col = "red")
    
    # warn about removal
    if(!is.null(oi.o)){
      if(!is.null(oi.h)){
        if(oi.o == oi.h){
          rm = paste("Oxygen & hydrogen outlier detected - Port", 
                     data.ok$Port[oi.o], "Inj",
                     data.ok$inj[oi.o])   
          warning(rm)          
        } else{
          rm = paste("Oxygen outlier detected - Port", 
                     data.ok$Port[oi.o], "Inj",
                     data.ok$inj[oi.o])   
          warning(rm)
          rm = paste("Hydrogen outlier detected - Port", 
                     data.ok$Port[oi.h], "Inj",
                     data.ok$inj[oi.h])   
          warning(rm)
        }
      } else{
        rm = paste("Oxygen outlier detected - Port", 
                   data.ok$Port[oi.o], "Inj",
                   data.ok$inj[oi.o])   
        warning(rm)        
      }
    } else{
      if(!is.null(oi.h)){
        rm = paste("Hydrogen outlier detected - Port", 
                   data.ok$Port[oi.h], "Inj",
                   data.ok$inj[oi.h])   
        warning(rm)
      }      
    }

    
    #generate vector showing samples(s) to be removed
    oi = data$seqN != data.ok$seqN[oi.o] & 
      data$seqN != data.ok$seqN[oi.h]
    #combine with previously removed samples
    oi = oi & oi.in
  }
  
  #calculate % missing or outliers
  pbad = (length(oi[oi == FALSE]) - 10) / (length(oi) - 10)
  
  #if more than 33% of inj are bad or missing warn and exit MDO
  if(pbad > 0.33){
    stop("More than 33% of injections are bad, exiting MDO")
  } else if(pbad > 0.1){
    warning(paste0(round(pbad*100), "% of injections are bad"))
  }
  
  #Return the oi vector including any additions
  return(oi)
}

## calculates a calbiration regression for d18O and d2H based
## on plrm values
cal.reg <- function(data, refs){
  ## data is the dataframe created using the data.mod function and 
  ## updated with memory-corrected values using the data.mc function
  
  ## creates a vector with the mean measured d18O values for 
  ## plrm1 and plrm 2 
  o.meas <- c(data$d18O_avg[data$ID %in% 
                              refs$refs[c("plrm1", "plrm2"), "ID"]])
  o.meas.sem <- c(data$d18O_sem[data$ID %in% 
                                  refs$refs[c("plrm1", "plrm2"), 
                                            "ID"]])
  
  ## creates a vector with the mean measured d2H values for 
  ## plrm1 and plrm 2  
  h.meas <- c(data$d2H_avg[data$ID %in% 
                             refs$refs[c("plrm1", "plrm2"), "ID"]])
  h.meas.sem <- c(data$d2H_sem[data$ID %in% 
                                 refs$refs[c("plrm1", "plrm2"), 
                                           "ID"]])
  
  #Simulate known values for PLRMs
  o.known.sim = matrix(c(rnorm(1000, refs$refs["plrm1",]$o.known, 
                               refs$refs["plrm1",]$o.known.sd),
                         rnorm(1000, refs$refs["plrm2",]$o.known, 
                               refs$refs["plrm2",]$o.known.sd)), 
                       ncol=2)
  h.known.sim = matrix(c(rnorm(1000, refs$refs["plrm1",]$h.known, 
                               refs$refs["plrm1",]$h.known.sd),
                         rnorm(1000, refs$refs["plrm2",]$h.known, 
                               refs$refs["plrm2",]$h.known.sd)), 
                       ncol=2)
  
  #Simulate measured values for PLRMs
  o.meas.sim = matrix(c(rnorm(1000, o.meas[1], o.meas.sem[1]),
                        rnorm(1000, o.meas[2], o.meas.sem[2])),
                      ncol=2)
  
  h.meas.sim = matrix(c(rnorm(1000, h.meas[1], h.meas.sem[1]),
                        rnorm(1000, h.meas[2], h.meas.sem[2])),
                      ncol=2)
  
  o.slope = apply(o.known.sim, 1, diff) / apply(o.meas.sim, 1, diff)
  o.int = o.known.sim[,1] - o.slope * o.meas.sim[,1]
  ##regression for d18O
  
  h.slope = apply(h.known.sim, 1, diff) / apply(h.meas.sim, 1, diff)
  h.int = h.known.sim[,1] - h.slope * h.meas.sim[,1]
  ##regression for d2H
  
  return(list(o.slope = o.slope, 
              o.int = o.int,
              h.slope = h.slope, 
              h.int = h.int))
}

## Calibrates data for the specified element 
data.cal <- function(data,element,cal){
  ## data is the dataframe created using the data.mod function and 
  ## updated with memory-corrected values using the data.mc function
  ## element is either "O" or "H"
  ## cal is the list created using the cal.reg function
  
  uncalData = if(element =="O"){
    data.frame("AVG" = data$d18O_avg, "SEM" = data$d18O_sem)
  }else if(element=="H"){
    data.frame("AVG" = data$d2H_avg, "SEM" = data$d2H_sem)
  } 
  ##dictates column used
  
  cal.slope <-if(element=="O"){cal$o.slope
  }else if(element=="H"){cal$h.slope} 
  ## dictates slope used
  
  cal.int <-if(element=="O"){cal$o.int
  }else if(element=="H"){cal$h.int} 
  ## dictates intercept used
  
  calMean = calSD = double()
  
  for(i in 1:nrow(uncalData)){
    calData = rnorm(1000, uncalData[i,1], uncalData[i,2]) * cal.slope + cal.int
    calMean = c(calMean, mean(calData))
    calSD = c(calSD, sd(calData))
  }
  
  ## calculates calibrated values for the
  ## specified column. Format is 
  ## memory-corrected value * slope + intercept
  
  return(list(calMean = calMean, calSD = calSD))
}

## Collapse multiple injection data to averages and standard errors
collapse = function(data, oi){
  #Average drift corrected isotope vals per port
  avg <- aggregate(data[oi,c("d18O_dc","d2H_dc")],
                   by=list(Port = data$Port[oi],
                           ID=data$ID[oi], ID2 = data$ID2[oi]),
                   mean, na.rm = TRUE)
  
  # Function for standard error of the mean
  sem = function(x){
    x = x[!is.na(x)]
    sd(x)/sqrt(length(x))
  }
  
  #standard error of isotope vals per port
  sterr <- aggregate(data[oi,c("d18O_dc","d2H_dc")],
                   by=list(Port = data$Port[oi],
                           ID=data$ID[oi], ID2 = data$ID2[oi]),
                   sem)

  names(avg)[4:5] = c("d18O_avg", "d2H_avg")
  names(sterr)[4:5] = c("d18O_sem", "d2H_sem")
  
  return(merge(avg, sterr, by=c("Port","ID","ID2")))
}

## Evaluates the mean and sd for slrm to determine if they are 
## within acceptable limits, as well as the sd for each port 
qa.flag <- function(data, refs){
  ## data is the dataframe created by the data.dc function
  ## qa.file is the filename of a csv as described in the cal.reg 
  ## function 
  
  slrmID = refs$refs["slrm",]$ID
  
  slrm.o <- mean(data$d18O_cm[data$ID==slrmID], na.rm=TRUE)
  ## calculates mean for slrm d18O
  
  slrm.h <- mean(data$d2H_cm[data$ID==slrmID], na.rm=TRUE)
  ## calculates mean for slrm d2H
  
  slrm.o.sd <- sd(data$d18O_cm[data$ID==slrmID], na.rm=TRUE)
  ## calculates sd for slrm d18O
  
  slrm.h.sd <- sd(data$d2H_cm[data$ID==slrmID], na.rm=TRUE)
  ## calculates sd for slrm d2H
  
  data$ignore_run <- ifelse(slrm.o > refs$criteria$slrm.o.max | 
                              slrm.o < refs$criteria$slrm.o.min | 
                              slrm.h > refs$criteria$slrm.h.max | 
                              slrm.h < refs$criteria$slrm.h.min | 
                              slrm.o.sd > 
                                refs$criteria$slrm.o.sd.max | 
                              slrm.h.sd > 
                                refs$criteria$slrm.h.sd.max, 1, 0)
  ## returns a 1 if any of the slrm quality parameters are violated
  ## and a 0 if not
  
  data$ignore_sample <- ifelse(
    (data$ignore_run == 1 | 
      complete.cases(data) == FALSE |
      data$d18O_sem > refs$criteria$sample.o.sd.max/2 |
      data$d2H_sem > refs$criteria$sample.h.sd.max/2), 1, 0)
  ## returns a 1 if any of the slrm quality parameters were violated
  ## as determined by the previous ifelse statement and or if the
  ## sample sem for d18O or d2H exceeds the maximum acceptable values
  
  if(slrm.o > refs$criteria$slrm.o.max | 
     slrm.o < refs$criteria$slrm.o.min){
    warning("slrm d18O average out of range")
  }
  if(slrm.h > refs$criteria$slrm.h.max | 
     slrm.h < refs$criteria$slrm.h.min){
    warning("slrm d2H average out of range")
  }
  if(slrm.o.sd > refs$criteria$slrm.o.sd.max){
    warning("slrm d18O sd out of range")
  }
  if(slrm.h.sd > refs$criteria$slrm.h.sd.max){
    warning("slrm d2H sd out of range")
  }
  ##generates warnings if parameters are violated
  
  if(any(data$ignore_sample == 1)){
    warning("sample values ignored")
  }
  ##generates warnings if sample values are to be ignored)
  
  return(data) 
}

## Summarizes the qa metrics for the run
qa.summary <- function(data.file, refs, mem, drift, cal, flagged){
  ## data.file is the filename of a csv as described in the data.mod
  ## function
  ## qa.file is the filename of a csv as described in the cal.reg 
  ## function
  ## mem is a list created by the mc.terms function
  ## drift is a list created by the drift.reg function
  ## cal is a list created by the cal.reg function
  ## flagged is a dataframe created by the qa.flag function
  
  slrmID = refs$refs["slrm",]$ID
 
  #Mean for slrm d18O
  slrm.o <- mean(flagged$d18O_cm[flagged$ID == slrmID], na.rm=TRUE)

  #Mean for slrm d2H
  slrm.h <- mean(flagged$d2H_cm[flagged$ID == slrmID], na.rm=TRUE)

  #SD for slrm d18O
  slrm.o.sd <- sd(flagged$d18O_cm[flagged$ID == slrmID], na.rm=TRUE)

  #SD for slrm d2H
  slrm.h.sd <- sd(flagged$d2H_cm[flagged$ID == slrmID], na.rm=TRUE)

  ## uses regular expressions to pull the instrument name out 
  ## of the file name
  Instrument <- ifelse(grepl("HIDS[0-9]*",data.file),
                    regmatches(data.file,regexpr("HIDS[0-9]*", 
                                                 data.file)),NA)
  
  ## uses regular expressions to pull the date out of the file name
  Run_date <- ifelse(grepl("HIDS[0-9]*",data.file),
                     regmatches(data.file,regexpr("[0-9]{8}", 
                                                  data.file)),NA)
  
  ## changes the format of the date to mm/dd/yy
  Run_date <- format(as.Date(Run_date,format="%Y%m%d"),"%m/%d/%y")
  
  #Parameters summary dataframe
  summary <- data.frame(parameter = c("Instrument", "Run_date", 
                                      "Memory1_O", "Memory2_O",
                                      "Memory3_O", "Memory4_O", 
                                      "Drift_O", "Slope_O", 
                                      "Intercept_O", "Memory1_H", 
                                      "Memory2_H", "Memory3_H", 
                                      "Memory4_H", "Drift_H",
                                      "Slope_H", "Intercept_H",  
                                      "SLRM_O_mean", "SLRM_H_mean", 
                                      "SLRM_O_sd", "SLRM_H_sd",
                                      "SLRM_count", "Ignore"),
                           value = c(Instrument, Run_date, 
                                     mem$o.mc[1], mem$o.mc[2], 
                                     mem$o.mc[3], mem$o.mc[4],
                                     diff(range(drift$o$fit$coef)),
                                     mean(cal$o.slope), 
                                     mean(cal$o.int), mem$h.mc[1], 
                                     mem$h.mc[2], mem$h.mc[3],
                                     mem$h.mc[4], 
                                     diff(range(drift$h$fit$coef)), 
                                     mean(cal$h.slope), 
                                     mean(cal$h.int), slrm.o, 
                                     slrm.h, slrm.o.sd, slrm.h.sd,
                                     nrow(flagged[flagged$ID ==
                                                    slrmID,]),
                                     flagged$ignore_run[1]))
  summary$value <- as.character(summary$value)

  return(summary)
}