R/HydeUtilities.R

Defines functions factor_reference dataframeFactors makeFactorRef validateParameters polyToPow policyMatrixValues nodeFromFunction matchVars matchLevelNumber makeJagsReady decisionOptions termName

Documented in dataframeFactors decisionOptions factor_reference makeFactorRef makeJagsReady matchLevelNumber matchVars nodeFromFunction policyMatrixValues polyToPow termName validateParameters

#' @name HydeUtilities
#' @importFrom magrittr %>%
#' @export %>%
#' 
#' @title Hyde Network Utility Functions 
#' @description The functions described below are unexported functions that 
#'   are used internally by \code{HydeNet} to prepare and modify network objects
#'   and prepare JAGS code.
#'   
#' @details 
#'   \code{termName}: In most model objects, factors in the model are 
#'   represented as [variable][level].  This utility function pulls out the 
#'   [variable] component.  This is typically called from within 
#'   \code{makeJagsReady}.
#'   
#'   \code{decisionOptions}: When compiling multiple JAGS models to evaluate the 
#'   effect of decision nodes, the options for each decision node are extracted
#'   by this utility.
#'   
#'   \code{makeJagsReady}: Manages the presence of factors in interactions and
#'   makes sure the proper numeric factor value is given to the JAGS code.  
#'   This is called from within a \code{writeJagsFormula} call.
#'   
#'   \code{matchLevelNumber}: Assigns the correct numeric value of a level to 
#'   a factor variable in a model.  This is called from within 
#'   \code{makeJagsReady}.
#'   
#'   \code{matchVars}: Given a list of existing node names, the terms of a formula
#'   are matched to the node names.  This allows functions to be used in 
#'   formula defintions.  Most commonly, \code{factor(var)} would get reduced to
#'   \code{var}, which is a node name that JAGS will understand.  There is still
#'   limited ability for translation here, and \code{matchVars} assumes that the 
#'   longest match is the desired match. If you pass a function with two node names,
#'   the longer of the two will be taken and JAGS will likely fail.
#'   
#'   \code{nodeFromFunction}: This is a utility function necessary to make 
#'   \code{modelToNode} work properly.  A node vector was needed to pass to 
#'   \code{matchVars}, and this is the mechanism to generate that vector.
#'   
#'   \code{polyToPow}: converts polynomials generated by the \code{poly} function
#'   to use the \code{pow} function in JAGS.
#'   
#'   \code{validateParameters}: Users may pass parameters to the JAGS code using the 
#'   \code{setNode} function.  If a faulty parameter is given, such as 
#'   \code{lambda = -10} for a poisson distribution (\code{lambda} must be
#'   positive in a Poisson distribution), the error returned by JAGS may not
#'   clear enough to diagnose a problem that presented several steps earlier
#'   in the code.  \code{validateParamters} allows the user to receive instant
#'   feedback on the appropriateness of the code.
#'   
#'   Logical expressions for comparisons are stored in the \code{jagsDists}
#'   data object (\code{data(jagsDists, package='Hyde')}).  This is a utility
#'   function used only within \code{setNode} and is not visible to the user.
#'   
#' @author Jarrod Dalton and Benjamin Nutter

#' @rdname HydeUtilities
#' @importFrom stringr str_extract
#'   
#' @param term Usually the \code{term} column from the output of 
#'   \code{broom::tidy()}
#' @param reg A regular expression, usually provided by \code{factorRegex}

termName <- function(term, reg)
{
  if (!is.null(reg))
  {
    sapply(X = term, 
           FUN = 
             function(t, reg)
             {
               t <- unlist(strsplit(x = t, 
                                    split = ":"))
               t <- ifelse(test = grepl(pattern = reg, 
                                        x = t),
                           yes = stringr::str_extract(string = t, 
                                                      pattern = reg),
                           no = t)
               t <- paste(t, collapse=":")
             },
           reg = reg)
  }
  else 
  {
    term
  }
}

#' @rdname HydeUtilities
#' 
#' @param node Character string indicating a node in a network
#' @param network A Hyde Network Object

decisionOptions <- function(node, network)
{
  #* In some cases, nodeFitter isn't set for a node.  When nodeFitter is NULL,
  #* we want to skip the "cpt" check and move on to other possibilities.
  #* If it isn't NULL and "cpt" is the fitter, we return dist immediately
  #* to avoid overwriting it in subsequent checks
  if (!is.null(network[["nodeFitter"]][[node]]))
  {
    if (network[["nodeFitter"]][[node]] == "cpt")
    {
      D <- 
        if (!is.null(network[["nodeData"]][[node]])) 
        {
          network[["nodeData"]][[node]][[node]] 
        }
        else 
        {
          network[["data"]][[node]]
        }
      dist <- 1:length(unique(D))
      return(dist)
    }
  }
  #* This uses a regular expression to extract the level number from
  #* the node JAGS model.  For instance
  #* pi.var[1] <- .123; pi.var[2] <- .321; ...
  #* the regular expression pulls out the numbers in between each set of [].
  if (network$nodeType[[node]] == "dcat")
  {
    dist <- writeJagsModel(network = network, 
                           node = node)[1]
    dist <- unlist(strsplit(x = dist, 
                            split = ";"))
    dist <- 
      stringr::str_extract(string = dist, 
                           pattern = stringr::regex("(?<=[\\[]).*(?=[\\]])")) %>%
      as.numeric()
  }
  else if (network$nodeType[[node]] == "dbern")
  {
    dist <- 0:1
  }
  dist
}

#' @rdname HydeUtilities 
#' @param mdl Output from \code{broom::tidy()}
#' @param factorRef A list of data frames mapping factors to levels
#' @param bern bernoulli node names.

makeJagsReady <- function(mdl, factorRef, bern)
{
  factorRef[bern] <- 
    lapply(factorRef[bern],
           function(x)
           {
             if (is.null(x)) return(NULL)
             x$level <- x$level - 1
             x
           }
    )
  
  factors <- 
    dplyr::filter(.data = mdl, 
                  !grepl(":", level) & !is.na(level) & level != "") %>%
    dplyr::distinct(term_plain) %$%
    term_plain
  
  mdl
  
  mdl$jagsVar <- 
    mapply(
      function(term, level, factors, factorRef)
      {
        val <- 
          mapply(
            function(fr, term, level)
            {
              val <- fr[[term]]$level[fr[[term]]$label == level]
              if (is.null(val)) val <- NA
              val
            },
            term = term,
            level = level,
            MoreArgs = list(fr = factorRef),
            SIMPLIFY = FALSE
          ) %>%
            unlist()
          
        ifelse(term %in% factors,
               sprintf("(%s == %s)", 
                       term, 
                       val),
               term)
      },
      term = stringr::str_split(mdl$term_plain, ":"),
      level = stringr::str_split(mdl$level, ":"),
      MoreArgs = list(factors = factors, 
                      factorRef = factorRef),
      SIMPLIFY = FALSE
    ) %>%
    vapply(
      paste0,
      character(1),
      collapse = "*"
    )
  
  mdl
}

#' @rdname HydeUtilities
#' @param t Usually the \code{term_name} column generated within 
#'   \code{makeJagsReady}
#' @param lev usually the \code{level_value} column generated within
#'   \code{makeJagsReady}

matchLevelNumber <- function(t, lev)
{
  t <- unlist(strsplit(x = t, 
                       split = ":"))
  l <- unlist(strsplit(x = as.character(lev), 
                       split = ":"))
  for (i in 1:length(t))
  {
    t[i] <- 
      if (is.na(l[i]))
      {
        t[i]
      }
      else 
      {
        paste0("(", t[i], " == ", l[i], ")")
      }
  }
  paste0(t, collapse="*")
}

#' @rdname HydeUtilities
#' @param terms A vector of term names, usually from a \code{broom::tidy} object.
#' @param vnames A vector of term names, usually from \code{network$nodes}.

matchVars <- function(terms, vnames)
{
  Matches <- sapply(X = vnames, 
                    FUN = function(p) stringr::str_extract(string = terms, 
                                                           pattern = p))
  Matches <- apply(X = as.matrix(Matches), 
                   MARGIN = 1, 
                   FUN = function(s) ifelse(test = is.na(s), 
                                            yes = "", 
                                            no = s))
  Matches <- apply(X = as.matrix(Matches), 
                   MARGIN = 2, 
                   FUN = function(s) s[which.max(nchar(s))])
  Matches[which(grepl(pattern = "Intercept", x = terms))] <- 
    terms[which(grepl(pattern = "Intercept", x = terms))]
  
  Matches
}

#' @rdname HydeUtilities
#' @param node_fn A character string representing a function passed in a model formula

nodeFromFunction <- function(node_fn)
{
  node <- stringr::str_extract(string = node_fn, 
                               pattern = "(?<=[(]).+?(?=[)])")
  node <- gsub(pattern = "([*]|[,]|[/]|\\^)[[:print:]]+", 
               replacement = "", 
               x = node)
  ifelse(test = is.na(node), 
         yes = node_fn, 
         no = node)
}

#' @rdname HydeUtilities
#' 

policyMatrixValues <- function(node, network)
{
  policy <- network[["nodePolicyValues"]][[node]]
  if (!is.numeric(policy))
  {
    policy <- seq_along(policy)
  }
  
  if (network$nodeType[[node]] == "dbern")
  {
    policy <- policy - 1
  }
  
  policy
}

#' @rdname HydeUtilities
#' @param poly A single term for which the polynomial components should be 
#'   converted to the JAGS pow() function.

polyToPow <- function(poly)
{
  poly <- unlist(strsplit(x = poly, 
                          split = "[*]"))
  poly <- gsub(pattern = "poly[(]", 
               replacement = "pow(", 
               x = poly)
  poly <- ifelse(test = grepl("pow[(]", poly),
                 yes = gsub(pattern = "\\d{1,2}[)]", 
                            replacement = "", 
                            x = poly),
                 no = poly)
  poly <- ifelse(test = grepl(pattern = "pow[(]", 
                              x = poly),
                 yes = paste0(poly, ")"),
                 no = poly)
  
  poly <- paste0(poly, collapse="*")
  
  poly
}

  
#' @rdname HydeUtilities
#' @param params The list of parameters given in the \code{...} argument of 
#'   \code{setNode}
#' @param dist The JAGS distribution function name.  Appropriate names are
#'   in the \code{FnName} column of the \code{jagsDists} data object.

validateParameters <- function(params, dist)
{
  expr <- jagsDists[["paramLogic"]][jagsDists[["FnName"]] == dist]
  valid <- sapply(X = expr, 
                  FUN = function(e) with(params, eval(parse(text=e))))  
  valid[sapply(X = params, 
               FUN = function(p) p %in% c("fromData", "fromFormula"))] <- TRUE
  valid
}

#' @rdname HydeUtilities
#' 
makeFactorRef <- function(network)
{
  network_factors <- 
    names(network[["factorLevels"]])[!vapply(X = network[["factorLevels"]], 
                                             FUN = is.null, 
                                             FUN.VALUE = logical(1))]
  
  if (length(network_factors) == 0)
  {
    return(NULL)
  }
  
  Ref <- 
    lapply(X = network_factors,
           FUN = 
             function(f)
             {
               data.frame(value = 1:length(network[["factorLevels"]][[f]]),
                          label = network[["factorLevels"]][[f]],
                          stringsAsFactors = FALSE)
             }
    )
  names(Ref) <- network_factors
  
  types <- unlist(network[["nodeType"]][network_factors])
  types <- types[types %in% "dbern"]
  
  Ref[names(types)] <- 
    lapply(X = Ref[names(types)], 
           FUN = 
             function(f)
             {
               f$value <- f$value - 1
               f
             }
    )
  
  Ref[unique(names(Ref))]
}

#' @rdname HydeUtilities
#' @param dataframe A data frame.  The data frame will be searched for factors and
#'   a reference table (another data frame) is returned.
#'   
dataframeFactors <- function(dataframe)
{
  if (is.null(dataframe))
  {
    return(NULL)
  }
  
  factor_vars <- names(dataframe)[sapply(X = dataframe, 
                                         FUN = class) == "factor"]
  reference_list <- 
    lapply(X = factor_vars,
           FUN = function(f) data.frame(value = sort(unique(as.numeric(dataframe[[f]]))),
                                        label = levels(dataframe[[f]]),
                                        stringsAsFactors=FALSE))
  names(reference_list) <- factor_vars
  reference_list
}

#' @rdname HydeUtilities
#' @param data A data frame.  

factor_reference <- function(data)
{
  Ref <- 
    lapply(data,
           function(x)
           {
             if (is.factor(x)) data.frame(level = seq_along(levels(x)),
                                          label = levels(x))
             else NULL
           }
    )
  
  Ref[!vapply(Ref, is.null, logical(1))]
}

utils::globalVariables("level")
nutterb/HydeNet documentation built on July 13, 2020, 5:21 p.m.