R/generate_stratified_model.R

Defines functions generate_stratified_model

Documented in generate_stratified_model

#' Stratify an mb model by groups
#'
#' This function takes as input a modelbuilder model and
#' expands it according to specified stratifications (e.g.
#' age groups).
#'
#' @description The model needs to adhere to the structure specified by
#'     the modelbuilder package. Models built using the modelbuilder package
#'     automatically have the right structure. A user can also build a
#'     model list structure themselves following the modelbuilder
#'     specifications. If the user provides a file name, this file needs to
#'     be an RDS file and contain a valid modelbuilder model structure.
#' @param mbmodel modelbuilder model structure, either as list object
#'     or file name
#' @param stratum_list A list of lists defining the stratification structure
#'     to be applied to the model. At a minimum, the list must contain one
#'     sublist with the following structure: \code{stratumname} a character
#'     string giving the name of the stratum (e.g., "age"); \code{names} a
#'     vector of character strings defining the names for each group within
#'     the stratum (e.g., c("child", "adult")); \code{labels} a vector of
#'     character strings defining the labels that correspond to each group
#'     within the stratum, which are applied to the model codes (e.g.,
#'     c("c", "a")); \code{comment} a character string of any comments or
#'     notes for the stratum. The \code{labels} are appended to model state
#'     variables (e.g., S becomes S_c) and parameters (e.g., b becomes b_c).
#' @param par_stratify_list A list that defines the stratification of
#'    parameters in reference to state variables. This list can be auto
#'    generated by calling the modelbuilder \code{generate_stratifier_list}.
#' @return The function returns a modelbuilder model object (a list).
#' @author Andrew Tredennick and Andreas Handel
#' @export
#' @examples
#' \dontrun{
#' mbmodel <- readRDS("auxiliary/modelfiles/Coronavirus_vaccine_model_v2.Rds")
#' strata_list <- list(
#'  stratumname = "risk",
#'  names = c("high risk", "low risk"),
#'  labels = c("h", "l"),
#'  comment = "This defines the risk structure."
#'  )
#' }

generate_stratified_model <- function(mbmodel,
                                      stratum_list,
                                      par_stratify_list)
{
  #if the model is passed in as a file name, load it
  #otherwise, it is assumed that 'mbmodel' is a list structure
  #of the right type
  if (is.character(mbmodel)) {
    mbmodel = readRDS(mbmodel)
  }
  mb <- mbmodel

  #compute number of groups in stratum
  ngroups <- length(stratum_list$names)

  #error checking
  if(!is.null(stratum_list$labels))
  {  #if not null, then the labels and names need to be the same length
    if(ngroups != length(stratum_list$labels))
    {
      stop(paste("Labels and names for",
                 stratum_list$stratumname,
                 "are not of equal length"))
    }
  } else {  #make labels if NULL
    stratum_list$labels <- stratum_list$names  #labels are just the names if NULL
  }

  #get vector of all parameter names
  strat_pars <- unlist(lapply(par_stratify_list, "[[", 1))

  #get same vector of parameters from the model itself
  mod_pars <- unlist(lapply(mbmodel$par, "[[", 1))

  #make sure all parameters are present in the strat_pars
  test <- sum(!strat_pars %in% mod_pars)
  if(test != 0) {
    stop("All parameters must have a stratification specified.")
  }

  #make dataframe of variable names
  varnames_map <- data.frame(abb = unlist(lapply(mbmodel$var, "[[", 1)),
                             name = unlist(lapply(mbmodel$var, "[[", 2)))

  strat_map <- data.frame(abb = stratum_list$labels,
                          name = stratum_list$names)

  #set up a new modelbuilder list object
  #this gets made anew (and overwritten) each iteration through
  #a stratum. newmb is always the most recent, and final, object
  #containing the appended variables, flows, and parameters.
  #the mb object is always the "to-be-stratified" object, which is
  #either the original object sent to this function or the most
  #recent iteration of newmb. see if/then/else statement above.
  newmb <- as.list(c(mb$title,
                     mb$description,
                     mb$author,
                     mb$date,
                     mb$details))
  names(newmb) <- c("title", "description", "author", "date", "details")

  #loop over all model variables/equations and apply stratum subgroups
  nvars <- length(mb$var)
  ct <- 1  #counter for state variables
  par_updates <- list()  #empty storage for new parameters
  for(v in 1:nvars) {  #open loop over variable
    var <- mb$var[[v]]
    focal_var <- var$varname

    for(g in 1:ngroups) {  #open loop over strata
      lab <- stratum_list$labels[g]
      nm <- stratum_list$names[g]

      #append group label to the variable names
      newname <- paste(var$varname, lab, sep = "")

      #loop over flows and expand as per the par_stratify_list
      nflows <- length(var$flows)
      #create an empty character object for the appended flows
      # allnewflows <- character(length = nflows)
      allnewflows <- list()
      for(f in 1:nflows) {  #open loop over each flow in the var equation
        flow <- var$flows[f]

        #extract just the variables and parameters, in order, from the flows
        flowsymbols <- get_vars_pars(flow)

        #extract just the math symbols, in order, from the flows
        flowmath <- get_math(flow, flowsymbols)

        #identify as parameter or state variable
        types <- character(length(flowsymbols))
        types[] <- "par"
        stateids <- which(substr(flowsymbols, 1, 1) %in% LETTERS)
        types[stateids] <- "var"

        #get parameter stratification mappings for the current
        #parameters of interest (those in this flow)
        these_pars <- which(lapply(par_stratify_list, "[[", 1) %in%
                              flowsymbols[types == "par"])
        par_maps <- par_stratify_list[these_pars]

        #flag if there are more than 1 state variable
        #this means we need to stratify interaction terms
        if(length(stateids) > 1) {
          inter <- TRUE
        } else {
          inter <- FALSE
        }

        if(inter) {
          #create data frame matching names and subscripts
          #this is all possible combinations of vars/pars and strata subscripts
          expansion <- expand.grid(flowsymbols,
                                   stratum_list$labels,
                                   stringsAsFactors = FALSE)
          names(expansion) <- c("original_name", "group")

          #set all parameters (non state variables) to have no subscript
          #this gets updated later based on the stratification mapping
          #between state variables and parameters
          expansion[!expansion$original_name %in% flowsymbols[stateids], "group"] <- ""

          #flows are getting replicated for each stratum, so we number
          #them here to keep track
          expansion$flow_num <- rep(1:ngroups, each = length(flowsymbols))

          #add id for whether a parameter or a state variable
          expansion$type <- rep(types, times = ngroups)

          #if the flow is out of the compartment, then we just need to
          #make sure that the focal variable receives the focal subscript
          #in all flows
          if(unlist(strsplit(flowmath, ""))[1] == "-") {
            expansion[expansion$original_name == var$varname, "group"] <- lab
          }

          #if the flow is into the compartment and includes and interaction,
          #then we just need to make sure that "S" receives the focal subscript
          #because all donations come from "S"
          #NOTE: THIS ONLY WORKS FOR SPECIFIC SIR STYLE MODELS
          #      I THINK WE CAN GENERALIZE BY ADDING A "DONOR"
          #      ELEMENT TO THE MBOBJECT
          if(unlist(strsplit(flowmath, ""))[1] == "+") {
            ids_to_update <- which(substr(expansion$original_name, 1, 1) == "S")
            expansion[ids_to_update, "group"] <- lab
          }
        } else {
          expansion <- expand.grid(flowsymbols,
                                   lab,
                                   stringsAsFactors = FALSE)
          names(expansion) <- c("original_name", "group")
          expansion[!expansion$original_name %in% flowsymbols[stateids], "group"] <- ""
          expansion$flow_num <- 1
          expansion$type <- types
        }

        #loop over parameter names and apply mapping
        npars <- length(par_maps)
        for(p in 1:npars) {
          map <- par_maps[[p]]
          if(length(map$stratify_by) == 1) {
            varstrat <- map$stratify_by
            parlab <- paste0(varstrat, lab)
            expansion[expansion$original_name == map$parname, "group"] <- parlab
          } else if(length(map$stratify_by) == 0) {
            expansion[expansion$original_name == map$parname, "group"] <- ""
          } else {
            #loop over flow replicates and apply parameter mapping
            for(fn in 1:length(unique(expansion$flow_num))) {
              par_name <- map$parname
              map_vars <- map$stratify_by
              tmp <- expansion[expansion$flow_num == fn &
                                 expansion$original_name %in% map_vars, ]
              tmp2 <- tmp[ ,c("original_name","group")]
              new_sub <- paste(apply(tmp2, 1, paste, collapse = ""), collapse = "_")
              expansion[expansion$flow_num == fn &
                          expansion$original_name == par_name, "group"] <- new_sub
            }
          }
        }

        #loop over flow components and append subscripts accordingly
        new_flowsymbols <- character(length = nrow(expansion))
        for(fid in 1:nrow(expansion)) {
          tmpc <- expansion[fid,]
          if(tmpc["group"] == "") {
            #no appendage if numeric
            new_flowsymbols[fid] <- tmpc["original_name"]
          } else if(tmpc["type"] == "par") {
            #paste the two columns with _ for parameters
            #first, check for variable-mapping in the group name
            #if there, then subset the original_name character string to
            #just the characters before the first underscore
            #this is done because the appending for interactions already
            #includes the full complement of strata
            varcheck <- substr(tmpc["group"], 1, 1) %in% LETTERS
            if(varcheck) {
              origpar <- unlist(strsplit(as.character(tmpc["original_name"]), "_"))[1]
              tmpc["original_name"] <- origpar
            }
            new_flowsymbols[fid] <- apply(tmpc[,c("original_name","group")], 1, paste, collapse = "_")
          } else {
            #paste subscript for state variables, with no underscore
            new_flowsymbols[fid] <- apply(tmpc[,c("original_name","group")], 1, paste, collapse = "")
          }
        }

        #reassemble the flows using the math and the newflow parameters
        #and variables with appended labels for the group
        #start flowmath first followed by all but the first element of
        #the flow parameters and variables. this ensures correct ordering.
        #note that this assumes that all flows start with a math symbol.
        flowid <- expansion$flow_num
        newvarflows <- character(length(unique(flowid)))
        for(fid in 1:length(unique(flowid))) {
          newvarflows[fid] <- paste(flowmath,
                                    unlist(new_flowsymbols[which(flowid==fid)]),
                                    collapse = "")
        }

        #replace all the white space to get a flow in the correct
        #formate for modelbuilder
        newvarflows <- gsub(" ", "", newvarflows, fixed = TRUE)

        #store the new flow in the empty object
        allnewflows[[f]] <- newvarflows

        #update parameters data frame
        tmp_pars <- expansion
        tmp_pars$new_par <- unlist(new_flowsymbols)
        num_flag <- suppressWarnings(is.na(as.numeric(tmp_pars$original_name)))
        tmp_pars <- tmp_pars[tmp_pars$type == "par" & num_flag, ]
        tmp_pars$value <- NA
        tmp_pars$text <- NA
        tmp_pars$strat_by <- NA

        #loop over parameters and fill in values
        for(pn in 1:nrow(tmp_pars)) {
          dopar <- tmp_pars[pn, "original_name"]
          orig_par <- mb$par[[which(lapply(mb$par, "[[", 1) == dopar)]]
          strat_by <- par_stratify_list[[which(lapply(par_stratify_list, "[[", 1) == dopar)]]
          tmp_pars[pn, "value"] <- orig_par$parval
          tmp_group_names <- unlist(strsplit(tmp_pars[pn, "group"], "_"))
          tofrom_name <- substr(tmp_group_names, 1, (nchar(tmp_group_names)-1))
          tofrom_group <- substr(tmp_group_names, nchar(tmp_group_names), nchar(tmp_group_names))
          tofrom_group_names <- strat_map[match(tofrom_group, strat_map$abb), "name"]
          tofrom <- paste(tolower(varnames_map[which(varnames_map$abb %in% tofrom_name), "name"]), tofrom_group_names)
          origtext <- strsplit(orig_par$partext, ":")[[1]][1]
          if(length(tofrom) == 1) {
            tmp_pars[pn, "text"] <- paste0(origtext, ": ", tofrom[1])
          } else if(length(tofrom) == 0) {
            tmp_pars[pn, "text"] <- origtext
          } else {
            tmp_pars[pn, "text"] <- paste0(origtext, ": to ", tofrom[1], " from ", tofrom[2])
          }

          if(length(strat_by$stratify_by) == 1) {
            tmp_pars[pn, "strat_by"] <- paste(strat_by$stratify_by, lab, sep = "_")
          } else {
            gr <- tmp_pars[pn, "group"]
            gr <- unlist(strsplit(gr, split = "_"))
            tmp_pars[pn, "strat_by"] <- paste(gr, collapse = "::")
          }

        }  # end parameter value loop

        par_updates <- rbind(par_updates, tmp_pars)
      }  #end of flow loop

      #define the new state variable name, no underscore
      newname <- paste(var$varname, lab, sep = "")

      #append group name to the variable text
      newtext <- paste(var$vartext, nm, sep = " ")

      #append group name to the flow names
      newflownames <- rep(paste(var$flownames, nm, sep = ", "), each = ngroups)

      #create the variable sublist with the new, appended objects
      #this follows the modelbuilder structure
      newvar <- list(newname,
                     newtext,
                     var$varval,
                     unlist(allnewflows),
                     newflownames)
      names(newvar) <- names(var)  #set to the appropriate names

      #add the appended, stratified variable list to the newmb object
      #indexed by ct
      newmb[["var"]][[ct]] <- newvar
      ct <- ct+1  #advance the state variable counter
    }  #end of stratum group loop
  }  #end of state variable loop

  #remove duplicate new parameters
  rm_ids <- duplicated(par_updates$new_par)
  new_pars <- par_updates[!rm_ids, ]

  #loop over new parameters and:
  #  1) assign to the newmb object
  #  2) update stratification list
  newmb$par <- list()
  # new_strat_list <- list()
  for(ip in 1:nrow(new_pars)) {
    #assign to newmb object
    update <- list(parname = new_pars[ip, "new_par"],
                   partext = new_pars[ip, "text"],
                   parval = new_pars[ip, "value"])
    newmb$par[[ip]] <- update
  }

  #add in times back to the new modelbuilder object
  #time does not change due to stratification, so this can come from
  #the original modelbuilder object sent to the function
  newmb$time <- mbmodel$time

  #title for model, replacing space with low dash to be used
  #in function and file names
  modeltitle <- paste0(gsub(" ", "_", mbmodel$title),"_",stratum_list$stratumname)

  #give new title to model by appending 'stratified'
  newmb$title <- modeltitle

  #return the newmb object, which is always the most recently
  #stratified object
  return(newmb)

}  #end of function
andreashandel/modelbuilder documentation built on April 16, 2024, 8:48 a.m.