R/getCovariates.R

##' Get Covariates
##' 
##' This function pulls data from the World Bank datasets, such as GDP
##' indicators and other such data.  These kinds of variables can be useful
##' for estimating production, loss, etc. and thus form a good set of covariate
##' variables for imputation models.
##' 
##' @param data The data.table object where imputation is to be performed.
##' This object is used to determine which countries and times should be
##' queried from the World Bank dataset.
##' @param imputationParameters A list of the parameters for the imputation
##' algorithms.  See defaultImputationParameters() for a starting point.
##' 
##' @return A data.table containing columns geographicAreaM49 and
##' timePointYears corresponding to the values in data as well as columns
##' containing World Bank data, such as GPD, paved road %, temperature, and
##' precipitation.
##' 
##' @export
##' 

getCovariates = function(data, imputationParameters){
    
    ## Data Quality Checks
    stopifnot(is(data, "data.table"))
    stopifnot("geographicAreaM49" %in% colnames(data))
    if(!exists("ensuredImputationData") || !ensuredImputationData)
        ensureImputationInputs(data = data,
                               imputationParameters = imputationParameters)
#     stopifnot(covariates %in% c("gdpPerCapita", "gdpPPP", "pavedRoads",
#                                 "temperature", "precipitation"))
#     stopifnot(length(covariates) == 0)
    
#     allCountries =
#         GetCodeList(domain = "WorldBank",
#                     dataset = "wb_ecogrw",
#                     dimension = "geographicAreaM49")[type == "country", code]
    allCountries = as.character(unique(data$geographicAreaM49))
    allYears = as.character(unique(data[[imputationParameters$yearValue]]))
   
    infrastructureKey =
        DatasetKey(domain = "WorldBank",
                   dataset = "wb_infrastructure",
                   dimensions =
                       list(
                           Dimension(name = "geographicAreaM49",
                                     keys = allCountries),
                           Dimension(name = "wbIndicator",
                                     keys = "IS.ROD.PAVE.ZS"),
                           Dimension(name = "timePointYears",
                                     keys = allYears)
                       )
                   )
    
    ## Create the gdp key by modifying the infrastructure one
    gdpKey = infrastructureKey
    gdpKey@dataset = "wb_ecogrw"
    gdpKey@dimensions[[2]] =  Dimension(name = "wbIndicator",
                                        keys = c("NY.GDP.MKTP.PP.KD",
                                                 "NY.GDP.PCAP.KD"))

    ## Create the climate key by modifying the infrastructure one
    climateKey = infrastructureKey
    climateKey@dataset = "wb_climate"
    climateKey@dimensions[[2]] =  Dimension(name = "wbIndicator",
                                            keys = c("SWS.FAO.TEMP",
                                                     "SWS.FAO.PREC"))

    newPivot = c(
        Pivoting(code = "geographicAreaM49", ascending = TRUE),
        Pivoting(code = "wbIndicator", ascending = TRUE),
        Pivoting(code = "timePointYears", ascending = FALSE)
    )

#     base =
#         data.table(geographicAreaM49 = character(),
#                    wbIndicator = character(),
#                    timePointYears = character(),
#                    Value = numeric())

    worldBankData = rbind(
        GetData(gdpKey, pivoting = newPivot),
        GetData(infrastructureKey, pivoting = newPivot),
        GetData(climateKey, pivoting = newPivot)
    )
    
    worldBankData = dcast.data.table(worldBankData,
                         geographicAreaM49 + timePointYears ~ wbIndicator,
                         value.var = "Value")
    setnames(worldBankData,
             old = c("IS.ROD.PAVE.ZS", "NY.GDP.MKTP.PP.KD",
                 "NY.GDP.PCAP.KD"),
             new = c("sharePavedRoad", "gdpPPP", "gdpPerCapita"))
    worldBankData[, timePointYears := as.numeric(timePointYears)]
    setkeyv(worldBankData, cols = c("geographicAreaM49", "timePointYears"))
    worldBankData
}
SWS-Methodology/faoswsImputation documentation built on May 9, 2019, 11:48 a.m.