Nothing
#' Make data
#'
#' @inheritParams CausalQueries_internal_inherit_params
#' @param param_type A character. String specifying type of parameters to
#' make ("flat", "prior_mean", "posterior_mean", "prior_draw",
#' "posterior_draw", "define"). With param_type set to \code{define} use
#' arguments to be passed to \code{make_priors}; otherwise \code{flat} sets
#' equal probabilities on each nodal type in each parameter set;
#' \code{prior_mean}, \code{prior_draw}, \code{posterior_mean},
#' \code{posterior_draw} take parameters as the means or as draws
#' from the prior or posterior.
#' @param n Non negative integer. Number of observations.
#' If not provided it is inferred from the largest n_step.
#' @param n_steps A \code{list}. Number of observations to be
#' observed at each step
#' @param given A string specifying known values on nodes, e.g. "X==1 & Y==1"
#' @param nodes A \code{list}. Which nodes to be observed at each step.
#' If NULL all nodes are observed.
#' @param probs A \code{list}. Observation probabilities at each step
#' @param subsets A \code{list}. Strata within which observations are to be
#' observed at each step. TRUE for all, otherwise an expression that
#' evaluates to a logical condition.
#' @param complete_data A \code{data.frame}. Dataset with complete
#' observations. Optional.
#' @param verbose Logical. If TRUE prints step schedule.
#' @param ... additional arguments that can be passed to
#' \code{link{make_parameters}}
#' @return A \code{data.frame} with simulated data.
#' @export
#' @details
#' Note that default behavior is not to take account of whether a node has
#' already been observed when determining whether to select or not. One can
#' however specifically request observation of nodes that have not been
#' previously observed.
#' @examples
#'
#' # Simple draws
#' model <- make_model("X -> M -> Y")
#' make_data(model)
#' make_data(model, n = 3, nodes = c("X","Y"))
#' make_data(model, n = 3, param_type = "prior_draw")
#' make_data(model, n = 10, param_type = "define", parameters = 0:9)
#'
#' # Data Strategies
#' # A strategy in which X, Y are observed for sure and M is observed
#' # with 50% probability for X=1, Y=0 cases
#'
#' model <- make_model("X -> M -> Y")
#' make_data(
#' model,
#' n = 8,
#' nodes = list(c("X", "Y"), "M"),
#' probs = list(1, .5),
#' subsets = list(TRUE, "X==1 & Y==0"))
#'
#'# n not provided but inferred from largest n_step (not from sum of n_steps)
#' make_data(
#' model,
#' nodes = list(c("X", "Y"), "M"),
#' n_steps = list(5, 2))
#'
#' # Wide then deep
#' make_data(
#' model,
#' n = 8,
#' nodes = list(c("X", "Y"), "M"),
#' subsets = list(TRUE, "!is.na(X) & !is.na(Y)"),
#' n_steps = list(6, 2))
#'
# # Look for X only where X has not already been observed
#'
#' make_data(
#' model,
#' n = 8,
#' nodes = list(c("X", "Y"), c("X", "M")),
#' subsets = list(TRUE, "is.na(X)"),
#' n_steps = list(3, 2))
#'
#'# Example with probabilities at each step
#'
#'make_data(
#' model,
#' n = 8,
#' nodes = list(c("X", "Y"), c("X", "M")),
#' subsets = list(TRUE, "is.na(X)"),
#' probs = list(.5, .2))
#'
#'# Example with given data
#' make_data(model, given = "X==1 & Y==1", n = 5)
make_data <- function(
model,
n = NULL,
parameters = NULL,
param_type = NULL,
nodes = NULL,
n_steps = NULL,
probs = NULL,
subsets = TRUE,
complete_data = NULL,
given = NULL,
verbose = TRUE,
...){
# n_step, n consistency
if(!is.null(n)) {
n_check(n)
}
if(!is.null(n_steps) & !is.null(n)) {
if(max(n_steps %>% unlist) > n) {
stop("n_step larger than n")
}
}
if(!is.null(n_steps) & is.null(n)) {
n <- max(n_steps %>% unlist)
}
if(is.null(n_steps) & is.null(n)) {
n <- 1
}
# n_steps and probs reconciliation
if(!is.null(n_steps) & !is.null(probs)) {
warning("Both `n_steps` and `prob` specified. `n_steps` overrides `probs`.")
}
if(is.null(n_steps) & is.null(probs)) {
n_steps <- n
}
if(is.null(n_steps)) {
n_steps <- NA
}
if(is.null(probs)) {
probs <- 1
}
# Check that parameters sum to 1 in each param_set
if(!is.null(parameters)) {
parameters <- clean_param_vector(model, parameters)
}
# If parameters not provided, make or take from model
if (is.null(parameters)) {
if (!is.null(param_type)) {
parameters <- make_parameters(model, param_type = param_type, ...)
} else {
parameters <- get_parameters(model)
}
}
# Check nodes
if(is.null(nodes)) {
nodes <- list(model$nodes)
}
if(!is.list(nodes)) {
nodes <- list(nodes) # Lets one step nodes be provided as vector
}
if(!(all(unlist(nodes) %in% model$nodes))) {
stop("All listed nodes should be in model")
}
# Complete data
if(is.null(complete_data)) {
complete_data <- make_data_single(model,
n = n,
parameters = parameters,
given = given)
}
# Default behavior is to return complete data --
# triggered if all data and all nodes sought in step 1
if(all(model$nodes %in% nodes[[1]])) {
if(probs[1]==1 || n_steps[1]==n) {
return(complete_data)
}
}
# Otherwise, gradually reveal
# Check length consistency
roster <-
tibble(
node_names = lapply(nodes, paste, collapse = ", ") %>% unlist,
nodes = nodes,
n_steps = n_steps %>% unlist,
probs = probs %>% unlist,
subsets = subsets %>% unlist
)
if (verbose) {
print(roster)
}
observed <- complete_data
observed[,] <- FALSE
# Go step by step
j <- 1
while(j <= length(nodes)) {
pars <- roster[j, ]
observed <- observe_data(
complete_data,
observed = observed,
nodes_to_observe = pars$nodes %>% unlist,
prob = pars$probs,
m = pars$n_steps,
subset = pars$subsets)
j <- j + 1
}
observed_data <- complete_data
observed_data[!observed] <- NA
return(observed_data)
}
#' Observe data, given a strategy
#'
#' @param complete_data A \code{data.frame}. Data observed and unobserved.
#' @param observed A \code{data.frame}. Data observed.
#' @param nodes_to_observe A list. Nodes to observe.
#' @param prob A scalar. Observation probability.
#' @param m A integer. Number of units to observe; if specified, \code{m}
#' overrides \code{prob}.
#' @param subset A character. Logical statement that can be applied to rows
#' of complete data. For instance observation for some nodes might depend on
#' observed values of other nodes; or observation may only be sought if
#' data not already observed!
#' @return A \code{data.frame} with logical values indicating which nodes
#' to observe in each row of `complete_data`.
#' @importFrom stats runif
#' @importFrom dplyr tibble
#' @export
#' @examples
#' model <- make_model("X -> Y")
#' df <- make_data(model, n = 8)
#' # Observe X values only
#' observe_data(complete_data = df, nodes_to_observe = "X")
#' # Observe half the Y values for cases with observed X = 1
#' observe_data(complete_data = df,
#' observed = observe_data(complete_data = df, nodes_to_observe = "X"),
#' nodes_to_observe = "Y", prob = .5,
#' subset = "X==1")
# A strategy consists of a. names of types to reveal b. number of these
# to reveal c. subset from which to reveal them
observe_data <- function(complete_data,
observed = NULL,
nodes_to_observe = NULL,
prob = 1,
m = NULL,
subset = TRUE){
if(is.null(observed)) {
observed <- complete_data; observed[,] <- FALSE
}
if(is.null(nodes_to_observe)) {
nodes_to_observe <- names(complete_data)
}
if(is.null(m)) {
m <- NA
}
# Prep observed data dataframe
observed_data <- complete_data
observed_data[!observed] <- NA
# Within which subset to reveal?
if(!is.logical(subset) & subset != "TRUE") {
sub <- with(observed_data, eval(parse(text = subset)))
} else {
sub <- rep(TRUE, nrow(observed_data))
}
if(!any(sub)) {
message("Empty subset")
}
# Target to reveal
if(is.na(m)) {
E <- prob*sum(sub) # Expected number selected
m <- floor(E) + (runif(1) < E - floor(E)) # Get best m
}
observed[sample((seq_along(sub))[sub], m), nodes_to_observe] <- TRUE
return(observed)
}
#' Generate full dataset
#'
#' @inheritParams CausalQueries_internal_inherit_params
#' @param n An integer. Number of observations.
#' @param given A string specifying known values on nodes, e.g. "X==1 & Y==1"
#' @param parameters A numeric vector. Values of parameters may be specified.
#' By default, parameters is drawn from priors.
#' @param param_type A character. String specifying type of parameters to make
#' ("flat", "prior_mean", "posterior_mean", "prior_draw",
#' "posterior_draw", "define). With param_type set to \code{define} use
#' arguments to be passed to \code{make_priors}; otherwise \code{flat} sets
#' equal probabilities on each nodal type in each parameter set;
#' \code{prior_mean}, \code{prior_draw}, \code{posterior_mean},
#' \code{posterior_draw} take parameters as the means or as draws
#' from the prior or posterior.
#' @param w Vector of event probabilities can be provided directly.
#' This is useful for speed for repeated data draws.
#' @param P A \code{matrix}. Parameter matrix that can be used to
#' generate w if w is not provided
#' @param A A \code{matrix}. Ambiguity matrix that can be used
#' to generate w if w is not provided
#' @return A \code{data.frame} of simulated data.
#' @keywords internal
#'
#' @examples
#'
#' model <- make_model("X -> Y")
#'
#' # Simplest behavior uses by default the parameter vector contained in model
#' CausalQueries:::make_data_single(model, n = 5)
#'
#' CausalQueries:::make_data_single(model, n = 5, param_type = "prior_draw")
#'
#' # Simulate multiple datasets. This is fastest if
#' # event probabilities (w) are provided
#' w <- get_event_probabilities(model)
#' replicate(5, CausalQueries:::make_data_single(model, n = 5, w = w))
#'
make_data_single <- function(
model,
n = 1,
parameters = NULL,
param_type = NULL,
given = NULL,
w = NULL,
P = NULL,
A = NULL) {
# Check that parameters sum to 1 in each param_set
# if(!is.null(parameters)) parameters <- clean_param_vector(model, parameters)
# If parameters not provided, take from model
if(is.null(parameters)) {
if(!is.null(param_type)) {
parameters <- make_parameters(model, param_type = param_type)
} else {
parameters <- get_parameters(model)
}
}
# Generate event probabilities w if missing
if(is.null(w)) {
w <- get_event_probabilities(
model,
parameters = parameters,
A = A,
P = P,
given = given)
}
# Data drawn here
make_events(model, n = n, parameters = parameters, w = w) |>
expand_data(model)
}
#' simulate_data is an alias for make_data
#' @param ... arguments for \code{\link{make_model}}
#' @return A \code{data.frame} with simulated data.
#' @export
#' @examples
#' simulate_data(make_model("X->Y"))
simulate_data <- function(...) make_data(...)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.