Nothing
#' Set confound
#'
#' Adjust parameter matrix to allow confounding.
#'
#'
#' Confounding between X and Y arises when the nodal types for X and Y are not
#' independently distributed. In the X -> Y graph, for instance, there are 2
#' nodal types for X and 4 for Y. There are thus 8 joint nodal types:
#' \preformatted{
#' | | t^X | | |
#' |-----|----|--------------------|--------------------|-----------|
#' | | | 0 | 1 | Sum |
#' |-----|----|--------------------|--------------------|-----------|
#' | t^Y | 00 | Pr(t^X=0 & t^Y=00) | Pr(t^X=1 & t^Y=00) | Pr(t^Y=00)|
#' | | 10 | . | . | . |
#' | | 01 | . | . | . |
#' | | 11 | . | . | . |
#' |-----|----|--------------------|--------------------|-----------|
#' | |Sum | Pr(t^X=0) | Pr(t^X=1) | 1 |
#' }
#'
#' This table has 8 interior elements and so an unconstrained joint
#' distribution would have 7 degrees of freedom. A no confounding assumption
#' means that Pr(t^X | t^Y) = Pr(t^X), or Pr(t^X, t^Y) = Pr(t^X)Pr(t^Y).
#' In this case there would be 3 degrees of freedom for Y and 1 for X,
#' totaling 4 rather than 7.
#'
#' \code{set_confound} lets you relax this assumption by increasing the
#' number of parameters characterizing the joint distribution. Using the fact
#' that P(A,B) = P(A)P(B|A) new parameters are introduced to capture P(B|A=a)
#' rather than simply P(B). For instance here two parameters
#' (and one degree of freedom) govern the distribution of types X and four
#' parameters (with 3 degrees of freedom) govern the types for Y given the
#' type of X for a total of 1+3+3 = 7 degrees of freedom.
#'
#'
#' @inheritParams CausalQueries_internal_inherit_params
#' @param confound A \code{list} of statements indicating pairs of nodes whose
#' types are jointly distributed (e.g. list("A <-> B", "C <-> D")).
#' @return An object of class \code{causal_model} with updated parameters_df
#' and parameter matrix.
#' @export
#' @examples
#'
#' make_model('X -> Y; X <-> Y') |>
#' grab("parameters")
#'
#' make_model('X -> M -> Y; X <-> Y') |>
#' grab("parameters")
#'
#' model <- make_model('X -> M -> Y; X <-> Y; M <-> Y')
#' model$parameters_df
#'
#' # Example where set_confound is implemented after restrictions
#'make_model("A -> B -> C") |>
#' set_restrictions(increasing("A", "B")) |>
#' set_confound("B <-> C") |>
#' grab("parameters")
#'
#' # Example where two parents are confounded
#' make_model('A -> B <- C; A <-> C') |>
#' set_parameters(node = "C", c(0.05, .95, .95, 0.05)) |>
#' make_data(n = 50) |>
#' cor()
#'
#' # Example with two confounds, added sequentially
#' model <- make_model('A -> B -> C') |>
#' set_confound(list("A <-> B", "B <-> C"))
#' model$statement
#' # plot(model)
set_confound <- function(model, confound = NULL) {
# handle global variables
x <- NULL
is_a_model(model)
if (any(confound |> lapply(function(k)
grepl(";", k)) |> unlist())) {
stop("Please provide multipe confounds as a list")
}
given <- gen <- NULL
if (!(all(model$parameters_df$given == ""))) {
stop(paste(
"Confounds have already been declared.",
"Please declare confounds only once."
))
}
# extract the node from the nodal type name
node_from_type <- function(type) {
vapply(strsplit(type, "\\."), function(x) {
x[[1]]
}, character(1))
}
# extract the conditions from the nodal type name
conditions_from_type <- function(type) {
lapply(strsplit(type, "_"), function(x)
gsub('\\.', '', unique(x)))
}
# Housekeeping
if (is.null(confound)) {
message("No confound provided")
return(model)
}
if (is.null(model$P)) {
model <- set_parameter_matrix(model)
}
# Turn A <-> B format to lists
for (j in seq_along(confound)) {
if (grepl("<->", confound[[j]])) {
z <- vapply(strsplit(confound[[j]], "<->"), trimws, character(2))
z <- rev(model$nodes[model$nodes %in% as.character(z)])
confound[j] <- as.character(z[2])
names(confound)[j] <- z[1]
}
}
# Add confounds to model statement
model$statement <-
paste0(model$statement,
"; ",
paste(names(confound), "<->", confound, collapse = "; "))
# Expand parameters_df ------------------------------------------------------
names_P <- names(model$P)
for (i in seq_along(confound)) {
from_nodal_types <-
model$parameters_df |>
dplyr::filter(node == confound[i]) |>
dplyr::mutate(x = paste0(node, ".", nodal_type)) |>
dplyr::pull(x) |>
unique()
to_add <-
lapply(from_nodal_types, function(j)
model$parameters_df |>
dplyr::filter(node == names(confound)[i]) |>
dplyr::mutate(
given = ifelse(given == "", j, paste0(given, ", ", j)),
param_names = paste0(param_names, "_", j),
param_set = paste0(param_set, ".", j)
)) |>
dplyr::bind_rows()
model$parameters_df <-
rbind(filter(model$parameters_df, node != names(confound)[i]),
to_add) |>
dplyr::arrange(gen, param_set)
}
# P matrix expand -----------------------------------------------------------
for (i in seq_along(confound)) {
from_nodal_types <-
model$parameters_df |>
dplyr::filter(node == confound[i]) |>
dplyr::mutate(x = paste0(node, ".", nodal_type)) |>
dplyr::pull(x) |>
unique()
to_add <-
lapply(from_nodal_types, function(j) {
newP <- model$P |>
dplyr::filter(node_from_type(rownames(model$P)) == names(confound)[i])
rownames(newP) <- paste0(rownames(newP), "_", j)
# delete relevant entries: need to figure if *all* conditioning
# nodes are in observed data. Sometimes never: eg:"X.1_Y.00_X.0"
# ie. (Z| (X1), (Y00|X0)
row_elements <- conditions_from_type(rownames(newP))
for (k in seq_along(row_elements)) {
to_zero <- vapply(row_elements[k][[1]], function(nd) {
vapply(nd, function(ndd) {
grepl(ndd, names_P)
}, logical(length(names_P)))
}, logical(length(names_P))) |>
apply(1, prod)
newP[k, to_zero == 0] <- 0
}
newP
}) |>
dplyr::bind_rows()
# Remove impossible rows with all zeros
to_add <- filter(to_add, apply(to_add, 1, sum) != 0)
# Add in
model$P <- model$P |>
dplyr::filter(node_from_type(rownames(model$P)) != names(confound)[i]) |>
rbind(to_add)
}
# Clean up ------------------------------------------------------------------
# Remove existing distributions
# Distributions are no longer valid
if (!is.null(model$prior_distribution)) {
model$prior_distribution <- NULL
}
if (!is.null(model$posterior_distribution)) {
model$posterior_distribution <- NULL
}
if (!is.null(model$stan_objects)) {
model$stan_objects <- NULL
}
# P reorder
model$parameters_df <- model$parameters_df |>
dplyr::filter(param_names %in% row.names(model$P))
model$P <-
model$P[match(model$parameters_df$param_names, rownames(model$P)), ]
class(model$P) <- c("parameter_matrix", "data.frame")
rownames(model$parameters_df) <- NULL
# Export
attr(model$P, "param_set") <- unique(model$parameters_df$param_set)
return(model)
}
set_confounds <- set_confound
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.