R/file_prep.R

Defines functions check_files file_lookup

Documented in check_files file_lookup

# Retrieve file names for a given machine and date
file_lookup <- function(instrument, runDate){
  ## machine is the serial number of a Picarro instrument formatted 
  ## to match the formatting of the Dropbox file for that instrument
  ## (ie. hids2053, hids2052 or hids2046)
  cfg = init()
  
  if(nchar(runDate) != 10){stop("runDate format must be YYYY-MM-DD")}
  runDate = paste0(substr(runDate, 3, 4), substr(runDate, 6, 7), 
                   substr(runDate, 9, 10))
  ## change runDate to format yymmdd (eg. 150310)
  
  data.files <- list.files(paste(cfg$dataPath, instrument, sep = "/"), 
                           full.names=TRUE)
  ## returns list of files for the specified machine
  
  ids.files <- list.files(paste(cfg$dataPath, instrument,"runfiles", sep = "/"), 
                          full.names=TRUE)
  ## returns list of files in the 'runfiles' folder for the 
  ## specified machine, ie. the sample ids
  
  data.file <- tail(grep(runDate, data.files, value=TRUE), 1)
  ## returns filepath for last isotope file generated
  ## for the specified machine on the specified date
  
  ids.file <- tail(grep(runDate, ids.files, value=TRUE), 1)
  ## returns filepath for the last file with sample identifiers 
  ## for the specified machine on the specified date
  
  print(list(data.file=data.file,ids.file=ids.file))
  return(list(data.file=data.file,ids.file=ids.file))
}

#Check the data & ids files and return warnings
check_files <- function(files){
  ## filenames is a list with filenamea for a data file & a sample
  ## ids file, such as that generated by the file.lookup function
  
  ## reads in the data csv  
  data <- read.csv(files$data.file, stringsAsFactors=FALSE, 
                   strip.white=TRUE)
  
  ## checks that the necessary columns are present and returns
  ## error if not
  if(any(c("Port", "Inj.Nr", "d.18_16.Mean", "d.D_H.Mean") 
         %in% names(data) == FALSE)) { 
    stop("data file is not correctly formatted, check that the 
            machine and date are correct and check file format")
  } else {
    message("data file format correct")
  }
  
  if(nrow(data) < 41){
    stop("only reference water data")
  }
  
  # stores outlier/omit index info
  oi = c(rep(FALSE, 10), rep(TRUE, nrow(data)-10))
  
  # set oi flag for missing lines
  oi[is.na(data$H2O_Mean)] = FALSE
  
  # empty or missing lines
  ml_1 = data$Port[!oi]
  ml_2 = data$Inj.Nr[!oi]
  ml = paste0(ml_1, "#", ml_2)
  if(length(ml) > 10){
    ml = ml[11:length(ml)]
    ml = paste(ml, collapse = ", ")
    warning("the following injections are missing:")
    warning(ml)
  }
  
  ## reads in sample ids file
  ids <- read.csv(files$ids.file,stringsAsFactors=FALSE, 
                  strip.white=TRUE)
  
  ## renames columns
  names(ids) <- c("Tray","Port","ID","ID2")
  
  ## checks formatting of sample ids table and returns warnings
  ## as to wehter it is correctly formatted or not
  if(length(colnames(ids))!=4 | is.numeric(ids[,2])==FALSE | 
     is.character(ids[,3])==FALSE) {
    stop("ids file is not correctly formatted, check that the 
            machine and date are correct, that the file is named 
            correctly, and that the file is correctly formatted")
  } else {
    message("ids file format correct")
  }
  
  ## identifies the numeric part of the port column and converts
  ## it to numeric data
  data$Port<-as.numeric(regmatches(data$Port,regexpr(
    "[0-9][0-9]", data$Port)))
  
  ## creates a table with the frequency of each port in the data
  ## table
  data.ports.freq <- as.data.frame(table(data$Port))
  
  ## renames the columns in the frequency table
  names(data.ports.freq) <- c("Port","Freq")
  
  ## converts port column to numeric data
  data.ports.freq$Port <- as.numeric(data.ports.freq$Port)
  
  ## checks if any ports have less than the required number
  ## of injections and returns warnings to indicate whether
  ## they do or not
  if(any(data.ports.freq$Freq[data.ports.freq$Port>4] < 4) | 
     any(data.ports.freq$Freq[data.ports.freq$Port<=4] < 10)) {
    warning("Missing rows in data") 
  } else {
    message("data file complete")
  }
  
  ## generates unique list of ports in the sample ids table
  id_ports <- unique(ids$Port)
  
  ## checks that the ports listed in the sample ids table and the 
  ## data table are the same and returns warnings to indicate 
  ## whether they do or not
  if(any(id_ports %in% data.ports.freq$Port == FALSE) |
     any(data.ports.freq$Port %in% id_ports == FALSE)) {
    warning("Ports are not the same for data.file and ids.file, 
            check them and ensure they are the same before running 
            function process.data") 
  } else {
    message("ids match")
  }
  
  files$oi = oi
  return(files)
}
SPATIAL-Lab/CRDSutils documentation built on Dec. 12, 2024, 3:23 a.m.