R/combine_hru_files.R

#' Combine HRU Files
#'
#' Combine HRU files that are broken up into NHD+ regions into a single dataframe.
#' @param allFiles NHD+ file paths. Files should be in order: c(01, 02, 03, 04, 05, 06, 07, 08, 09, 10l, 10u,
#' 11, 12, 13, 14, 15, 16, 17, 18).
#' @param dateColumns Columns in each file that provide dates or other informational data.
#' @param autoSort Should the function attempt to sort the list of files?  Defaults to TRUE.
#' @param cl Cluster object created through parallel::makeCluster().  Defaults to NULL.  Only suggested for sub-monthly datasets.
#' @export combine_hru_files
#' @examples
#' combine_hru_files()

combine_hru_files <- function(allFiles    = list.files("C:/Users/ssaxe/Documents/Projects/Bock Data Review/ro_byHRU/",
                                                       full.names = T),
                              dateColumns = c(1,2),
                              autoSort    = TRUE,
                              cl          = NULL){
  # Sort by grep order
  if (autoSort){
    autoNames <- c('01', '02', '03', '04', '05', '06', '07',
                   '08', '09', '10l', '10u', '11', '12', '13',
                   '14', '15', '16', '17', '18')
    locAll <- unlist(lapply(X = autoNames,
                            FUN = function(x, allFiles){
                              locV <- which(grepl(pattern = x,
                                                  x       = allFiles) == T)
                              if (length(locV) != 1){
                                return(NA)}else{
                                  return(locV)
                                }
                            },
                            allFiles = allFiles))
    if (any(is.na(locAll))){
      warning("autoSort failed.")
    }else{
      allFiles <- allFiles[locAll]
    }
  }

  # Import
  allImports <- pbapply::pblapply(X                = allFiles,
                                  FUN              = read.csv,
                                  stringsAsFactors = F,
                                  cl               = cl)
  # isolate date columns
  baseColumns <- allImports[[1]][, dateColumns]
  # remove date columns from each
  allImports.sub <- pbapply::pblapply(X           = allImports,
                                      FUN         = function(x, dateColumns) x[, -c(1:2)],
                                      dateColumns = dateColumns)
  # cbind tables
  mat <- do.call(cbind, allImports.sub)
  # cbind date columns
  mat <- cbind(baseColumns, mat)
  # clear variables
  rm(allImports); rm(allImports.sub); gc()
  # return
  return(mat)
}
ssaxe-usgs/hru2huc documentation built on May 5, 2019, 2:42 a.m.