R/make_models.R

Defines functions make_parameters_df make_model

Documented in make_model make_parameters_df

#' Make a model
#'
#' \code{make_model} uses \link{dagitty} syntax and functionality to
#' specify nodes and edges of a graph. Implied causal types are calculated
#' and default priors are provided under the assumption of no confounding.
#' Models can be updated with specification of a parameter matrix, \code{P}, by
#' providing restrictions on causal types, and/or by providing informative
#' priors on parameters. The default setting for a causal model have flat
#' (uniform) priors and parameters putting equal weight on each parameter
#' within each parameter set. These can be adjust with \code{set_priors}
#' and \code{set_parameters}
#'
#' @param statement A character. Statement describing causal
#'   relations using \link{dagitty} syntax. Only directed relations are
#'   permitted. For instance "X -> Y" or  "X1 -> Y <- X2; X1 -> X2".
#' @param add_causal_types Logical. Whether to create and attach causal
#'   types to \code{model}. Defaults to `TRUE`.
#' @param nodal_types List of nodal types associated with model nodes
#' @export
#'
#' @return An object of class \code{causal_model}.
#'
#' An object of class \code{"causal_model"} is a list containing at least the
#' following components:
#' \item{statement}{A character vector of the statement that defines the model}
#' \item{dag}{A \code{data.frame} with columns `parent`and `children`
#'   indicating how nodes relate to each other.}
#' \item{nodes}{A named \code{list} with the nodes in the model}
#' \item{parents_df}{A \code{data.frame} listing nodes, whether they are
#'   root nodes or not, and the number of parents they have}
#' \item{nodal_types}{Optional: A named \code{list} with the nodal types in
#'   the model. List should be ordered according to the causal ordering of
#'   nodes. If NULL nodal types are generated. If FALSE, a parameters data
#'   frame is not generated.}
#' \item{parameters_df}{A \code{data.frame} with descriptive information
#'   of the parameters in the model}
#' \item{causal_types}{A \code{data.frame} listing causal types and the
#'   nodal types that produce them}
#'
#'
#' @seealso \code{\link{summary.causal_model}} provides summary method for
#'   output objects of class \code{causal_model}
#'
#'
#' @examples
#' make_model(statement = "X -> Y")
#' modelXKY <- make_model("X -> K -> Y; X -> Y")
#'
#' # Example where cyclicaly dag attempted
#' \dontrun{
#'  modelXKX <- make_model("X -> K -> X")
#' }
#'
#' # Examples with confounding
#' model <- make_model("X->Y; X <-> Y")
#' model$P
#' model <- make_model("Y2 <- X -> Y1; X <-> Y1; X <-> Y2")
#' dim(model$P)
#' model$P
#' model <- make_model("X1 -> Y <- X2; X1 <-> Y; X2 <-> Y")
#' dim(model$P)
#' model$parameters_df
#'
#' # A single node graph is also possible
#' model <- make_model("X")
#'
#' # Unconnected nodes not allowed
#' \dontrun{
#'  model <- make_model("X <-> Y")
#' }
#'
#' nodal_types <-
#'   list(
#'     A = c("0","1"),
#'     B = c("0","1"),
#'     C = c("0","1"),
#'     D = c("0","1"),
#'     E = c("0","1"),
#'     Y = c(
#'       "00000000000000000000000000000000",
#'       "01010101010101010101010101010101",
#'       "00110011001100110011001100110011",
#'       "00001111000011110000111100001111",
#'       "00000000111111110000000011111111",
#'       "00000000000000001111111111111111",
#'       "11111111111111111111111111111111" ))
#'
#' make_model("A -> Y; B ->Y; C->Y; D->Y; E->Y",
#'           nodal_types = nodal_types)$parameters_df
#'
#' nodal_types = list(Y = c("01", "10"), Z = c("0", "1"))
#' make_model("Z -> Y", nodal_types = nodal_types)$parameters_df
#' make_model("Z -> Y", nodal_types = FALSE)$parents_df

make_model <- function(statement,
                       add_causal_types = TRUE,
                       nodal_types = NULL) {

  parent <- NULL

  if (length(statement) != 1) {
    stop(
      paste(
        "The length of the character vector of the statement",
        "is unequal to 1. Please provide only 1 causal model."
      )
    )
  }

  if (!(is.character(statement))) {
    stop("The model statement should be of type character.")
  }

  # names and allowable names
  node_names <- strsplit(statement,
                         paste0(c("->", "<->", "<-", ";"), collapse = "|"),
                         perl = TRUE) |>
    unlist() |>
    trimws() |>
    unique()

  if (any(grepl("-", 	node_names))) {
    stop("No hyphens in varnames please; try dots?")
  }

  if (any(grepl("_", node_names))) {
    stop("No underscores in varnames please; try dots?")
  }

  # generate DAG
  x <-
    dagitty::edges(dagitty::dagitty(paste0("dag{", statement, "}"))) %>%
    data.frame(stringsAsFactors = FALSE)

  if (nrow(x) == 0) {
    dag <- data.frame(v = statement, w = NA)
  } else {
    dag  <- x %>%
      dplyr::filter(e == "->") %>%
      dplyr::select(v, w)
  }

  if (length(x) > 0 &&
      any(!(unlist(x[, 1:2]) %in% unlist(dag)))) {
    stop("Graph should not contain isolates.")
  }

  names(dag) <- c("parent", "children")

  # Procedure for unique ordering of nodes; ties broken by alphabet
  if (all(dag$parent %in% dag$children)) {
    stop("No root nodes provided.")
  }

  gen <- rep(NA, nrow(dag))
  j <- 1
  # assign 1 to exogenous nodes
  gen[!(dag$parent %in% dag$children)] <- j
  while (sum(is.na(gen)) > 0) {
    j <- j + 1
    xx <- (dag$parent %in% dag$children[is.na(gen)])
    if (all(xx[is.na(gen)])) {
      stop(paste("Cycling at generation", j))
    }
    gen[!xx & is.na(gen)] <- j
  }

  # dag is now given a causal order which is preserved in the parameters_df
  dag <- dag[order(gen, dag[, 1], dag[, 2]),]

  endog_node <- as.character(rev(unique(rev(dag$children))))
  if (all(is.na(endog_node))) {
    endog_node <- NULL
  }
  .exog_node <- as.character(rev(unique(rev(dag$parent))))
  exog_node  <- .exog_node[!(.exog_node %in% endog_node)]

  # ordered nodes
  nodes <- c(exog_node, endog_node)

  # parent count df
  parents_df <-
    data.frame(node = nodes, root = nodes %in% exog_node) |>
    dplyr::mutate(parents = vapply(node, function(n) {
      dag |>
        dplyr::filter(children == n) |>
        nrow()
    }, numeric(1))) |>
    dplyr::mutate(parent_nodes = sapply(node, function(n) {
      dag |>
        dplyr::filter(children == n) |>
        dplyr::pull(parent) |>
        paste(collapse = ", ")
    }))

  # Model is a list
  model <-
    list(
      statement = statement,
      dag = dag,
      nodes = nodes,
      parents_df = parents_df
    )

  # Nodal types
  # Check nodal types map to nodes in model
  if ((!is.null(nodal_types)) && (!all(names(nodal_types) %in% nodes))) {
    stop("Check provided nodal_types are nodes in the model")
  }

  # Check ordering and completeness
  if (!is.null(nodal_types) && !is.logical(nodal_types)) {
    if (!all(sort(names(nodal_types)) == sort(nodes))) {
      stop(
        paste(
          "Model not properly defined: If you provide nodal types you should",
          "do so for all nodes in model: ",
          paste(nodes, collapse = ", ")
        )
      )
    }

    if (!all(names(nodal_types) == nodes)) {
      message(paste(
        "Ordering of provided nodal types is being altered to",
        "match generation"
      ))
      nodal_types <- lapply(nodes, function(n)
        nodal_types[[n]])
      names(nodal_types) <- nodes
    }
  }

  if (is.logical(nodal_types)) {
    add_causal_types <- FALSE
    message(
      paste(
        "Model not properly defined: nodal_types should be NULL or specified",
        "for all nodes in model: ",
        paste(nodes, collapse = ", ")
      )
    )
  }

  if (is.null(nodal_types)) {
    nodal_types <- get_nodal_types(model, collapse = TRUE)
  }

  # Add nodal types to model
  model$nodal_types <- nodal_types

  # Add nodal type interpretation
  if (is.null(attr(nodal_types, "interpret"))) {
    attr(model$nodal_types, "interpret") <- interpret_type(model)
  }

  # Parameters dataframe
  if (!is.logical(nodal_types)) {
    model$parameters_df <- make_parameters_df(nodal_types)
  }

  # Add class
  class(model) <- "causal_model"

  # Add causal types
  if (add_causal_types) {
    model$causal_types <- update_causal_types(model)
  }

  # Add confounds if any provided

  if (any(x$e == "<->")) {
    confounds <- NULL

    z  <- x |>
      dplyr::filter(e == "<->") |>
      dplyr::select(v, w)
    z$v <- as.character(z$v)
    z$w <- as.character(z$w)

    # Reorder by reverse causal order (thus in X -> Y we have
    # type_Y conditional on type_X)
    for (i in seq_len(nrow(z))) {
      z[i,] <- rev(nodes[nodes %in% as.character(z[i, ])])
    }
    # Generate confounds list
    confounds <- as.list(as.character(z$w))
    names(confounds) <- z$v

    # Check on ineligible confound statements
    if (any(!(c(z$v, z$w) %in% nodes))) {
      stop(paste(
        "Confound relations (<->) must be between",
        "nodes contained in the dag"
      ))
    }
    model <- set_confound(model, confounds)
  }

  # Prep for export
  attr(model, "nonroot_nodes") <- endog_node
  attr(model, "root_nodes")  <- exog_node


  # assign classes
  class(model$dag) <- c("dag", "data.frame")
  class(model$statement) <- c("statement", "character")
  class(model$nodes) <- c("nodes", "character")
  class(model$parents_df) <- c("parents", "data.frame")
  class(model$nodal_types) <- c("nodal_types", "list")

  return(model)

}


#' function to make a parameters_df from nodal types
#' @param nodal_types a list of nodal types
#' @export
#' @keywords internal
#' @examples
#'
#' make_parameters_df(list(X = "1", Y = c("01", "10")))

make_parameters_df <- function(nodal_types){
  pdf <- data.frame(node = rep(names(nodal_types), lapply(nodal_types, length)),
                    nodal_type = nodal_types %>% unlist) |>
    dplyr::mutate(param_set = node,
                  given = "",
                  priors = 1,
                  param_names = paste0(node, ".", nodal_type)) |>
    dplyr::group_by(param_set) |>
    dplyr::mutate(param_value = 1/n(), gen =  cur_group_id()) |>
    dplyr::ungroup() |>
    dplyr::mutate(gen = match(node, names(nodal_types))) |>
    dplyr::select(param_names, node, gen, param_set, nodal_type,
                  given, param_value, priors)

  class(pdf) <- c("parameters_df", "data.frame")
  return(pdf)
}
macartan/gbiqq documentation built on April 28, 2024, 10:07 p.m.