MacroGrowth: Economics Models

is.in <- function( el, set ) {
  if (is.null(set)) { 
    return( rep(TRUE, length(el) ) ) 
  }
  is.element(el, set)
}

#' Loads resampled coefficients, models, or fits from files
#' 
#' @param path the directory containing the resampled coefficient files. Do not add a trailing file separator.
#' @param archive a zip archive containing resampled coefficient files
#' @param country a country or vector of countries.
#' @param model a model or vector of models of the form \code{c("sf", "cd", "ces", "linex")}, for example.
#' @param factors strings of factors used in fitting the models 
#' of the form \code{"iK+iL+iQp"} or \code{"iQp+iK+iL"}.
#' @param sep the separator within the file names between country, model, and factors
#' @param kind a character string that identifies which kind of resampled data to load, one of 
#' \code{coeffs}, \code{models}, or \code{fitted}.
#' @return a data frame containing resampled coefficients (if \code{kind="coeffs"}),
#' a list of models which contain the resample fits (if \code{kind="models"}), or
#' a data frame containing resampled fits to the response variable (if \code{kind="fitted"}).
#' @details Supply either \code{path} or \code{archive} arguments. 
#' Defaults are correct when only one is supplied, so long as archives are constructed with 
#' all resample files at the root level of the zip archive.
#' \code{kind} can be one of 
#' \code{"coeffs"}, to load resampled coefficients; 
#' \code{"models"}, to load models generated by the resample fits; or
#' \code{"fitted"}, to load fits to resampled data.
#' Models need to be generated with \code{save.data=TRUE} so that the original data can
#' be extracted from the model objects.
#' @export
loadResampledData <- function( path="", archive=NULL, country=NULL, model=NULL, 
                               factors=NULL, sep="_", kind=c("coeffs","models","fitted") ){
  kind <- match.arg(kind)
  if (kind == "fitted"){
    prefix <- "models"
  } else {
    prefix <- kind
  }
  if (is.null(archive)){
    files <- dir(path)
  } else {
    filesDF <- unzip(zipfile=archive, list=TRUE)
    files <- filesDF[["Name"]]
  }
  # Keep only those files with the ".rds" extension
  files <- files[grepl(pattern="\\>.rds", x=files)]
  # Keep only files with desired prefix
  files <- files[grepl(pattern=prefix, x=files)]
  # Remove the path from the file names, if present
  names <- sub(pattern=paste0("\\<", path), replacement="", x=files)
  # Remove any file separators at the beginning of the names, if present.
  names <- sub(pattern=paste0("\\<", .Platform$file.sep), replacement="", x=names)
  # Remove the prefix from the file names, if present
  names <- sub(pattern=paste0("\\<", prefix, sep), replacement="", x=names)
  # Remove the .rds suffix from the file names.
  names <- sub(pattern="\\>.rds", replacement="", x=names)
  pieces <- strsplit( x=names, split=sep )
  keep <- sapply( pieces, 
                  function(x) { is.in(x[1],country) && is.in(x[2],model) &&  
                                  is.in(x[3],factors) } )
  files <- files[ keep ]
  pieces <- pieces[keep]  
  dflist <- list()
  nFiles <- length(files)
  
print(keep)
print(files)
  
  
  if (length(pieces) != nFiles){
    stop("Unequal length for files and names in loadResampledData.")
  }
  
  if (kind == "coeffs") {
    # Load data from the resample coefficient files
    for (i in 1:nFiles){
      if (is.null(archive)){
        df <- readRDS ( file.path(path, files[i]) ) # Read files from the directory on disk
      } else {
        connection <- gzcon(unz(archive, files[i])) 
        df <- readRDS ( connection )  # Read files from the archive
        close(connection)
      }
      if (! inherits(df, "data.frame") ) next
      
      # Add relevant information to the data frame
      if ("sigma" %in% names(df) ){
        sigmaTrans <- ifelse(df$sigma < 2, df$sigma, 1.5 - df$rho )
        df$sigmaTrans <- sigmaTrans
      }  
      if ("sigma_1" %in% names(df) ){
        sigmaTrans_1 <- ifelse(df$sigma_1 < 2, df$sigma_1, 1.5 - df$rho_1 )
        df$sigmaTrans_1 <- sigmaTrans_1
      }
      
      countryAbbrev <- pieces[[i]][1]
      modelType <- pieces[[i]][2]
      nestStr <- pieces[[i]][3]
      parsedNestStr <- parseFactorString(factorString=nestStr)
      
      df$country <- countryAbbrev
      df$model <- modelType
      df$nestStr <- nestStr
      df$nestStrParen <- parsedNestStr[["nestStrParen"]]
      df$energy <- parsedNestStr[["energyType"]]
      df$factor <- parsedNestStr[["factor"]]
      dflist[[i]] <- df
    }
    res <- do.call( plyr::rbind.fill, dflist ) 
    return(res)
  }
  
  if (kind == "models"){
    # A list of models is desired.
    modelsList <- list()
    for (i in 1:nFiles){
      if (is.null(archive)){
        dat <- readRDS ( file.path(path, files[i]) ) # Read files from the directory on disk
      } else {
        connection <- gzcon(unz(archive, files[i])) 
        dat <- readRDS ( connection ) # Read files from the archive
        close(connection)
      }
      modelsList <- c(modelsList, dat)
    }
    return(modelsList)
  }
  
  if( kind != "fitted" ){
    stop("This should never happen") 
  }

  for (i in 1:nFiles){
    # Get the models associated with this file.
    if (is.null(archive)){
      modelsList <- readRDS( file.path(path, files[i]) ) # Read files from the directory on disk
    } else {
      connection <- gzcon(unz(archive, files[i])) 
      modelsList <- readRDS( connection ) # Read files from the archive
      close(connection)
    }
    
    # The first model is the fit to historical data. 
    # The model object also contains the original data as an attribute
    # if the model was created with save.data=TRUE.
    origModel <- modelsList[["orig"]]
    # Extract the data frame containing the actual (historical) data
    actual <- subset( getData(origModel), select=c("Year", "iGDP", "Country") )
    
    row.names(actual) <- NULL # Eliminates row names if they are present.
    
    countryAbbrev <- pieces[[i]][1]
    modelType <- pieces[[i]][2]
    nestStr <- pieces[[i]][3]
    parsedNestStr <- parseFactorString(factorString=nestStr)
    
    # Get the data Source (from the directory) and add it to the data frame
    dir_pieces <- strsplit(path, split=.Platform$file.sep)[[1]]
    Source <- dir_pieces[length(dir_pieces)]
    actual$Source <- Source
    actual$resampleNumber <- NA
    actual$resampled <- FALSE
    actual$factor <- parsedNestStr[["factor"]]
    # The historical data doesn't really have an "energy" associated with it.
    # But, setting the Energy column to the requested energyType will allow this
    # actual data to show up in the correct facet on any graphs that facet on energyType.
    actual$energy <- parsedNestStr[["energyType"]]
    actual$model <- modelType
    # The historical data doesn't really have a "nest" associated with it.
    # But, setting the nest column to the requested nest will allow this
    # actual data to show up in the correct facet on any graphs that factet on nest.
    actual$nestStr <- nestStr
    actual$nestStrParen <- parsedNestStr[["nestStrParen"]]
    actual$iGDP.hat <- yhat(origModel)
    
    # Add the resampled fits. 
    # The resample models are the 2nd through nFiles models in the modelsList
    #    dfList <- list()
    #    resampleModels <- modelsList[-1] # Cycle through all the models, except the original model
    #    nModels <- length(resampleModels)
    #    for (j in 1:nModels){
    #      resampleDF <- pred
    #      resampleDF$iGDP <- yhat(resampleModels[[j]])
    #      resampleDF$resampleNumber <- j
    #      resampleDF$resampled <- TRUE
    #      dfList[[length(dfList) + 1]] <- resampleDF
    #    }
    
    # can we do something less drastic (using environments or frames) here?
    j <<- 0
    dfList <- lapply( modelsList[-1], function(m) {
      j <<- j+1
      return(transform(actual,
                       iGDP = response(m),
                       iGDP.hat = yhat(m),
                       resampleNumber = j,
                       resampled=TRUE
                       ## type="resampled"
      ))
    } 
    )
    temp <- do.call("rbind", c(list(actual), dfList))
    outgoing <- data.frame()
    outgoing <- do.call("rbind", c(list(outgoing, temp)) )
  }
  return(outgoing)
}


#' Loads post-processed data files
#' 
#' @param Source the data source to be loaded
#' @param kind one of \code{coeffs} (to load coefficients from a file named <Source>_Coeffs), 
#' \code{omodels} (to load original models from a file named <Source>_oModels), or 
#' \code{fitted} (to load fitted data to all resamples from a file named <Source>_Fitted).
#' @param dir specifies the directory from which to load the files.
#' Default is \code{data_postprocessed}.
#' @export
loadPostProcessedData <- function(Source, kind=c("coeffs","omodels","fitted"), dir=file.path("data_postprocessed")){
  kind <- match.arg(kind)
  if (kind == "coeffs"){
    path <- file.path(dir, paste0(Source, "_Coeffs.rds"))
  } else if (kind == "omodels"){
    path <- file.path(dir, paste0(Source, "_oModels.rds"))
  } else {
    if (kind != "fitted"){
      # This should never happen
      stop(paste("Unknown kind:", kind))
    }
    path <- file.path(dir, paste0(Source, "_Fitted.rds"))
  }
  return(readRDS(path))
}