R/processForward.R

##' Process Forward
##' 
##' A few edges in the commodity trees are labeled with an 'F' to indicate that 
##' processing is 'forward'.  The parent commodities in these edges are 
##' immediately converted into the corresponding child and then they are removed
##' from the tree (as we will standardize to the children instead).  This is a 
##' rare scenario; an example commodity is sugar.
##' 
##' Note: when commodities are processed forward like this, the final flag is
##' assigned ARBITRARILY as the first flag observed (for lack of a better
##' approach).  This should be corrected.
##' 
##' @param data The data.table containing the full dataset for standardization.
##' @param tree The commodity tree which provides the edge structure.
##' @param standParams The parameters for standardization.  These parameters 
##'   provide information about the columns of data and tree, specifying (for 
##'   example) which columns should be standardized, which columns represent 
##'   parents/children, etc.
##'   
##' @return A list of names 'data' and 'tree'.  Both objects must be returned, 
##'   as the tree is updated by pruning off some edges.
##'   

processForward = function(data, tree, standParams){
    
    ## If no forward processing edge, than don't do anything:
    if(all(tree[, get(standParams$targetVar) != "F"])){
        return(list(data = data, tree = tree))
    }
    
    cnames = colnames(data)
    
    subTree = tree[get(standParams$targetVar) == "F", ]
    level = getCommodityLevel(subTree, parentColname = standParams$parentVar,
                              childColname = standParams$childVar)
    setnames(level, c(standParams$parentVar, "level"))
    if(length(unique(tree[parentID %in% subTree[, parentID], target])) > 1){
        warning("Some parents have one edge set to be forward processed and ",
                "another edge not.  How to handle such a case is not clear, ",
                "and this may cause strange behavior.")
    }
    subTree = merge(subTree, level, by = standParams$parentVar)
    setnames(subTree, standParams$parentVar, standParams$itemVar)
    
    ## Use the mode as the flag aggregation algorithm
    Mode <- function(x) {
        ux <- unique(x)
        ux[which.max(tabulate(match(x, ux)))]
    }
    
    for(currentLevel in subTree[, sort(unique(level))]){
        dataToUpdate = merge(data, subTree[level == currentLevel, ],
                             by = standParams$itemVar, allow.cartesian = TRUE)
        ## Process the node down by first computing the availability of the
        ## parent as the balance
        dataToUpdate = dataToUpdate[, list(parentAvail = sum(Value *
            ifelse(element %in% c(standParams$exportCode, standParams$stockCode,
                                  standParams$foodCode, standParams$foodProcCode,
                                  standParams$feedCode, standParams$wasteCode,
                                  standParams$seedCode, standParams$industrialCode,
                                  standParams$touristCode, standParams$residualCode), -1,
                   ifelse(element %in% c(standParams$importCode,
                                         standParams$productionCode), 1, 0)),
            na.rm = TRUE),
            parentAvailSd = sqrt(sum(standardDeviation^2 *
                ifelse(element %in% c(standParams$exportCode, standParams$stockCode,
                        standParams$foodCode, standParams$foodProcCode,
                        standParams$feedCode, standParams$wasteCode,
                        standParams$seedCode, standParams$industrialCode,
                        standParams$touristCode, standParams$residualCode,
                        standParams$importCode, standParams$productionCode), 1, 0),
                na.rm = TRUE))),
            by = c(standParams$mergeKey, standParams$childVar,
                   standParams$extractVar)]
        dataToUpdate[, c(standParams$itemVar) := get(standParams$childVar)]
        dataToUpdate[, Value := get(standParams$extractVar) * parentAvail]
        dataToUpdate[, standardDeviation := get(standParams$extractVar) * parentAvailSd]
        dataToUpdate[, element := standParams$productionCode]
        dataToUpdate = dataToUpdate[, c(standParams$mergeKey, "element", "Value",
                                        "standardDeviation"), with = FALSE]
        ## Aggregate dataToUpdate in case there are multiple parents going into
        ## one child.
        dataToUpdate = dataToUpdate[, list(Value = sum(Value),
                    standardDeviation = sqrt(sum(standardDeviation^2))),
                by = c(standParams$mergeKey, "element")]
                
        ## Add in the new data values
        data = merge(data, dataToUpdate, by = c(standParams$mergeKey, "element"),
                     all = TRUE, suffixes = c("", ".new"))
        data[is.na(Value), c("Value", "standardDeviation") :=
                 list(Value.new, standardDeviation.new)]
        data[, c("Value.new", "standardDeviation.new") := NULL]
        
        ## Remove the values processed forward from the original data
        data = data[!get(standParams$itemVar) %in%
                        subTree[level == currentLevel, get(standParams$itemVar)]]
    }
    tree = tree[!get(standParams$targetVar) == "F", ]
    
#     ## This function may create some new commodities, and only the production of
#     ## these items will be in the data.frame.  To prevent future issues, fill in
#     ## all other elements as well.

    return(list(data = data, tree = tree))
}
SWS-Methodology/faoswsAupus documentation built on May 9, 2019, 11:45 a.m.