R/convert_mat_files.R

# functions for re-arranging the .mat species files

############################################################################
# DEFINING VARIABLES (this part should be removed for the package)

# if un-commented, this part could be incoporated for scripting
# hdir <- "/Volumes//My Passport for Mac/gpfs:pace1:/new_2006/"
# mat_fold <- "new_mat_files_2006/"
# rds_fold <- "new_rds_files_2006/"
# 

sources <- c("AG", "AIRC", "BIOG", "COAL", "DUST", "FIRE", "FOIL", "MEAT", 
             "METAL", "NG", "NRDIE", "NRGAS", "NROTH", "ORDIE", "ORGAS", "OT", 
             "OTHCMB", "SOLVENT", "SSALT", "WOOD")

sources_05 <- sources %>%
  set_names(c("AG", "AC", "BI", "CL", "DU", "FI", "FO", "ME", "MT", "NG4",
              "ND", "NG1", "NT", "OD", "OG", "OT", "OC", "SV", "SS", "WO"))

sources_07 <- sources_05
inds <- c(8, 13, 17:18)
sources_07 <- sources_07[-inds]
names(sources_07)[10] <- "NA"; names(sources_07)[12] <- "NG"

############################################################################


#' Convert .mat CMAQ output to .rds files
#' 
#' NB: THere might be a buffer error, because of which some Matlab files fail 
#' to load properly. Hence, after calling this function, one should manually 
#' check to verify that all of the matlab files have been converted to rds 
#' files.
#' 
#' @param mat_path filepath of folder containing .mat files
#' @param ch_species chemical species, of length 41
#' @param yr year the data corresponds to, as a string
#' @param sind,end_ind starting and ending indices, respectively, from which to 
#'  extract the species names from the names of the .mat files. The default is 
#'  set to \code{sind = 1} and \code{end_ind = 4} since, usually the files are 
#'  just named SPECIES_NAME.mat. Hence, subsetting from 1 to -4 would yield 
#'  SPECIES_NAME.
#' 
#' @importFrom magrittr %>%
#' @importFrom stringr str_sub
#' @importFrom beepr beep
#' @importFrom R.matlab readMat
#' @importFrom assertthat assert_that
#' 
#' @export
convert_mat_files <- function(mat_path, yr, ch_species = species, sind = 1, 
                              end_ind = 4, from = 1) {
  
  NUM_SPECIES <- 41
  orig_path <- setwd(mat_path)
  on.exit( setwd(orig_path) )
  
  rds_fold <- paste0("rds_files_", yr)
  assert_that( length(dir()) == length(ch_species) )
  mat_files <- dir(full.names = TRUE)
  
  for(k in mat_files[from:NUM_SPECIES]) {
    print(paste("Loading file", which(k == mat_files), "for processing"))
    
    # TODO: encapsulate this in a "convert_mat_file" function, called in the loop
    
    # try to read in .mat file, and set file path of target .rds file
    holder <- tryCatch(R.matlab::readMat(k), error = function(c) NA)
    filename <- paste("..", rds_fold, 
                      paste0(str_sub(basename(k), sind, -end_ind), "rds"), 
                      sep = "/")
    
    # if there was an error reading in the .mat file, move on to next one
    if(is.na(holder)) next
    
    # keep what isn't metadata
    holder2 <- holder[[1]][[1]]
    names(holder2) <- dimnames(holder2)[[1]] # added for 2007 data
    
    stopifnot( length(holder2) == 21 )
    stopifnot( holder2 %>% sapply(dim) %>% apply(1, unique) == c(112, 148, 364) )
    
    # save result
    message("Saving...")
    if(!file.exists(paste0("../", rds_fold))) dir.create(paste0("../", rds_fold))
    saveRDS(holder2, filename)
    rm(holder, holder2)
    
    # beep()
  }
  
}

#' Assign List Elements with Names 
#' 
#' Renames each element of the list to identify which source the element 
#' corresponds to. 
#' 
#' @param rds_file rds_file containing sensitivity values and simulated 
#'  concentrations generated by CMAQ
#' @param yr the year to which the data corresponds
#' @export
#' 
rename_rds_files <- function(rds_file, yr) {
  
  # uses sources_05 as a look-up table, convert the new names
  # TODO: MAKE THIS PART CLEARER
 
  if(missing(yr)) 
    stop("Please specify the year this data corresponds to", call. = FALSE)
  if(is.numeric(yr)) yr <- as.character(yr)
  
  
  if(yr == "2006") {
    element_names <- dimnames(rds_file)[[1]]
    if( all(element_names[-1] %in% sources) ) element_names[1] <- "c_sim"
    rds_file <- rds_file %>% set_names(element_names)
  } 
  else if (yr == "2005") {
    orig_names <- dimnames(rds_file)[[1]]
    dimnames(rds_file)[[1]] <- c("c_sim", sources_05[ orig_names[-1] ] %>% unname)
  }
  else if(yr == "2007") {
    orig_names <- names(rds_file)
    names(rds_file) <- c("c_sim", unname(sources07[ orig_names[-1] %>% unname]) )
  }
  
  rds_file
}
nabilabd/hybridSA documentation built on May 23, 2019, 12:03 p.m.