#' Declare a structural model
#'
#' Declare a set of exogenous and endogenous variables and a set of functions,
#' \code{var_functions}, indicating how variables are affected by each other.
#'
#' @param var_functions a list of structural equations. Each element in the list must be a function specifying how each variable relates to its direct causes. Causes's functions should be specified before effects' functions i.e. if \code{X} causes \code{Y} then \code{f_X} should be specified before \code{f_Y}.
#' @param var_names an optional list of node names.
#' @param P a function to generate a context. The joint probability distribution over the exogenous variables, \code{U}.
#' @return a list containing the declared model.
#' @export
#' @examples
#' pt_model()
pt_model <- function(
var_functions =
list( f_S = function(U,V) U[1] < .5,
f_X = function(U,V) U[2] < .5,
f_C = function(U,V) (1- (V[1]*V[2]))*(U[3]>.2) +(U[3]<.1),
f_R = function(U,V) V[2]*V[3]*(U[4]>.2) + (U[4]<.1),
f_Y = function(U,V) V[3]*V[4]*(U[5]>.2)+ (U[5]<.1)
),
var_names = c("S", "X","C", "R", "Y"),
P = function() runif(length(var_functions))){
model <- list(var_names = var_names, var_functions = var_functions, P = P)
return(model)
}
#' Function to generate a world (values for V) from a structural model
#'
#'
#' @param model a model made using \code{\link{pt_model}}.
#' @param U a context. A realization of the exogenous variables in the structural model. Generally generated using \code{model$P()}.
#' @param do an optional list of do operations on nodes. An intervention.
#' @return a world.
#' @export
#' @examples
#' M <- pt_model()
#' pt_world(M)
#' pt_world(M, U = runif(5))
pt_world <- function(model, U = NULL, do = NULL){
# Get U if needed
if(is.null(U)){U <- model$P()}
# Tidy U if needed
if(!is.data.frame(U)) U <- data.frame(t(U))
names(U) <- paste0("U_",model$var_names)
# Add V
n <- length(model$var_names)
V <- rep(NA, n)
if(!is.null(do)) for(i in 1:n){if(!is.na(do[i])) {
model$var_functions[[i]] <- function(U,V) do[[i]]}}
for(i in 1:n) V[[i]] <- model$var_functions[[i]](U,V)
V <- data.frame(t(V))
names(V) <- model$var_names
cbind(V,U)
}
#' Function to generate a dataset of possible worlds from a structural model with V provided
#'
#'
#' @param model a model made using \code{\link{pt_model}}.
#' @param N Number of simulations
#' @param U a sim*n_nodes dataframe .
#' @param do an optional list of do operations on nodes. An intervention.
#' @return a dataframe with worlds
#' @export
#' @examples
#' M <- pt_model()
#' pt_worlds(M, N = 4)
#' pt_worlds(M, N = 4)
#' U = data.frame(matrix(runif(10), 2, 5))
#' pt_worlds(M, U = U)
pt_worlds <- function(model, N = NULL, do = NULL, U = NULL){
if(!is.null(U) & !is.null(N)) if(nrow(U)!=N) stop("U should have N rows")
if(is.null(U) & is.null(N)) stop("Please provide N or U")
if(is.null(U) & !is.null(N)) U <- rep(NULL, N)
if(is.null(N) & !is.null(U)) N <- nrow(U)
x <- pt_world(model, do = do, U = U[1,])
if(N>1) {for(j in 2:N) x <- rbind(x, pt_world(model, do = do, U = U[j,]))}
x
}
#' A function to figure out which worlds satisfy some query
#'
#' The output is a list that contains a matrix of contexts, U, the outcomes associated with them, V, and an indicator for whether the query is satisfied or not in each context, A.
#'
#' @param model A model made using \code{\link{pt_model}}.
#' @param U A matrix of contexts (worlds). If missing this is generated based on sims.
#' @param ops A set of operations.
#' @param query A query defined on outcomes on observables, V.
#' @param sims Optional number of simulations for draws; defaults to 500 if U not defined.
#'
#' @export
#'
#' @examples
#' pt_which(model = pt_model(),
#' ops = list(c(0, NA, NA, NA, NA), c(1, NA, NA, NA, NA)),
#' query = function(x) (x[[1]][5] == 1) & (x[[2]][5] == 0),
#' sims = 3)
#'
pt_which <- function(
model, # the model
U = NULL, # dimensionality: sims * n_nodes
ops,
query,
sims = 500) {
# U
if(is.null(U)) {
U <- data.frame(t(replicate(sims, model$P())))
names(U) <- paste0("U_", model$var_names)
}
# V
V <- pt_worlds(model, U = U)[model$var_names]
# A
k <- length(ops)
f <- function(u) { # For each context, apply operations, and take result
x <- list()
for(i in 1:k){x[[i]] <- pt_world(model, u, do = ops[[i]])}
query(x)}
list(U = U, A = apply(U, 1, f), V = V)
}
#' A function to plot worlds that satisfy a query
#'
#' @param which_worlds A worlds evaluation object generated by `pt_which`.
#' @param col1 Color parameter for query = TRUE
#' @param col2 Color parameter for query = FALSE
#'
#' @export
#'
#' @examples
#' qw <- pt_which(pt_model(),
#' ops = list(c(0, NA, NA, NA, NA), c(1, NA, NA, NA, NA)),
#' query = function(x) (x[[1]][5] == 1) & (x[[2]][5] == 0))
#' pt_worlds_plot(qw)
pt_worlds_plot <- function(which_worlds, col1 = "black", col2 = "pink") {
pairs(which_worlds$U,
col = ifelse(which_worlds$A, col1, col2),
pch = ".")}
#'
#' #'
#' #' Calculate the Probability that a binary clue K = 1, given W.
#' #' K enters as a vector of the form c(TRUE, FALSE, ...)
#' #'
#' #' @param model A model, made using pt_model
#' #' @param ops A set of operations
#' #' @param query A query defined on outcomes on observables, V
#' #' @param sims optional number of simulations for draws; defaults to 500 if U not defined
#' #' @param W Preexisting data
#' #' @param K Clues being looked for
#' #' @export
#' #' @examples
#' #' pKw(model = pt_model(),
#' #' ops = list(c(0, NA, NA, NA, NA), c(1, NA, NA, NA, NA)),
#' #' query = function(x) (x[[1]][5] == 1) & (x[[2]][5] == 0),
#' #' sims = 3,
#' #' W = matrix(c(0,1,1,0,0), 1, 5),
#' #' K = matrix(c(1,0,0,0,0), 1, 5))
#' pKw <- function(model, ops, query, sims=NULL, U=NULL, W, K){
#'
#' if(is.null(dim(K))) K <- matrix(K, 1)
#' if(!is.matrix(K)) stop("Ks should be provide as a matrix with as many columns as elements in V")
#' if(sum(K)>1) stop("Include only one clue please")
#' if(is.null(U)) U <- pt_worlds(model, N = sims, include_V = FALSE)
#'
#' pt_worlds(model, U = U)
#' V <- pt_worlds[model$var_names]
#' # sims <- nrow(U)
#'
#' # result <- pt_which(model, U=U[paste0("U_", model$var_names)], ops = ops, query = query)
#'
#' in_W <- sapply(1:sims, function(i) !(0 %in% (1*(result$V[i,] == W))))
#'
#' # Two cases: when W is not possible and when it is:
#' if(sum(in_W) ==0 ) {out <- NA
#' } else { out <- mean(result$V[in_W,K] == 1) } # ie
#' out
#' }
#' Calculate what is learned from a set of observations, Ks, given existing data, W
#'
#' Defaults to the posterior variance on lambda_b - lambda_a for simple pt model
#' @param model A model, made using pt_model
#' @param ops A set of operations
#' @param query A query defined on outcomes on observables, V
#' @param W Observed variables: Vector indicating what is known on each node. Values NA, 0, 1 etc.
#' @param Ks TRUE/FALSE matrix with one column per node indicating which clues to be sought.
#' @param sims optional number of simulations for draws; defaults to 500 if U not defined
#' @param f any function; defaults to f=var, posterior variance. For posterior mean set f = mean
#' @param detail defaults to FALSE. When TRUE output includes listing of possible patterns and probability of each
#' @export
#' @examples
#' model <- pt_model()
#' ops = list(c(0, NA, NA, NA, NA), c(1, NA, NA, NA, NA))
#' query = function(x) (x[[1]][5] ==1) & (x[[2]][5] == 0)
#' Ks = rbind(c(TRUE, FALSE, FALSE, FALSE, FALSE),
#' c(TRUE, TRUE, FALSE, FALSE, FALSE),
#' c(TRUE, TRUE, FALSE, TRUE, FALSE),
#' c(TRUE, TRUE, TRUE, TRUE, TRUE))
#' pt_learning(model, ops, query,
#' sims = 100,
#' Ks = Ks)
#' pt_learning(model, ops, query,
#' sims = 100,
#' W = c(0, NA, NA, NA, NA),
#' Ks = Ks)
#' U <- as.matrix(t(replicate(100, model$P())))
#' pt_learning(model, ops, query,
#' U = U,
#' W = c(0, NA, NA, NA, NA),
#' Ks = Ks)
#'
pt_learning <- function(
model,
ops,
query,
sims = 2000,
U = NULL,
W = NULL,
Ks = NULL, # Or a matrix with one column per node indicating whether to be sought or not
f = var,
add_means = FALSE,
detail = FALSE, # provide detailed output on clue probabilities
detailround = 5 # Digits on rounding for detailed output
){
if(is.null(W)) W <- rep(NA, length(model$var_names))
if(!is.null(Ks) & is.null(dim(Ks))) Ks <- matrix(Ks, 1)
if(!is.null(Ks) & !is.matrix(Ks)) stop("Ks should be provided as a matrix with as many columns as elements in V")
if(is.null(U)) U <- as.matrix(t(replicate(sims, model$P())))
sims <- nrow(U)
V <- pt_worlds(model, U=U)[model$var_names]
A <- pt_which(model, U=U, ops = ops, query = query)$A
prior <- f(A) # First result
in_W <- sapply(1:sims, function(i) !(0 %in% (1*(V[i,] == W))))
# Two cases:
# Impossible W:
if(all(!in_W)) { print("W is 0 probability event")
} else {
# Possible W
posterior_given_W <- f(A[in_W]) # Second result
# Now go through each of the K patterns given in Ks, check all implied permutations
# of possible findings, get prob for each and post loss function (f, var) for each
# Expected probability and variance
# Expected probability and variance
epv <- function(K, detail = FALSE) {
Kranges <- apply(as.matrix(V[,K]), 2, function(j) length(unique(j)))
if(is.null(Ks) | !(length(Kranges)>0)) return(posterior_given_W)
# All permutations of the clues that are sought
perms <- perm(Kranges)
patterns <- matrix(NA, nrow(perms), ncol(V))
patterns[, K] <- perms
n_patterns <- nrow(patterns)
# Expected prob and variance over all the combinations of K
prob_var <- sapply(1:n_patterns, function(j) {
in_K <- sapply(1:sims, function(i) !(0 %in% (1*(V[i,] == patterns[j,]))))
p_Kv <- mean(in_K[in_W]) # probability of K pattern, given W
V_given_WK <- f(A[in_W & in_K]) # 0 var for 0 prob event
if(sum(in_W & in_K) <= 1) V_given_WK <- 0
c(prob = p_Kv, variance = V_given_WK)
})
# If requestd this segment takes and returns the detailed weights and variance
if(detail) {
pattern <- sapply(1:n_patterns, function(i) paste(patterns[i,], collapse = "-"))
x <- rbind(pattern, round(prob_var,detailround))
rownames(x) <- c("K-pattern", "prob pattern", "var|pattern");
return(x) }
return(sum(prob_var[1,]*prob_var[2,]))
}
# Expected variance for each strategy
e_p_v <- apply(Ks, 1, epv)
}
# Output
##################################################################################
if(all(in_W == 0)){ posterior_given_W <- NA
e_p_v <- rep(NA, nrow(Ks))
}
if(is.null(Ks)) return(c(prior = prior, v_w = posterior_given_W))
# Main output
out <- c(prior, posterior_given_W, e_p_v)
names(out) <- c("Prior v", "v |W", sapply(1:nrow(Ks), function(r) {paste(1*Ks[r,], collapse = "")} ))
# Detail option
if(detail) out <- list(out, apply(Ks, 1, epv, detail = TRUE))
out
}
#' Get matrix of possible Ws: the set of possible data for a case when data is sought for *up to* m out of n nodes
#'
#' @param model a fitted pt model
#' @param m Number of W variations to make
#' @export
#' @examples
#' possible_W(3, 2)
#' possible_W(3, 1)
possible_W <- function(n, m) {
W <- perm(rep(3, n))
W[W==2] <- NA
W <- W[apply(!is.na(W), 1, sum) <= m,]
W}
#' Get matrix of possible Ks observation possibilities ( is observed or it is not observed) on up to m out of n nodes
#'
#' @param model a fitted pt model
#' @export
#' @examples
#' possible_K(3,1)
#'
possible_K <- function(n, m) {
K <- perm(rep(2, n))
K <- K[rowSums(K) <= m,]
K==1}
#' Asssess expected learning from a host of strategies.
#'
#' @param model a fitted pt model
#' @param m_w Number of Ws to look at
#' @param m_k Number of Ks to look at
#' @export
#' @examples
#' model <- pt_model()
#' ops = list(c(0, NA, NA, NA, NA), c(1, NA, NA, NA, NA))
#' query = function(x) (x[[1]][5] ==1) & (x[[2]][5] == 0)
#' pt_strategies(model = model, ops = ops, query = query, sims = 100)
pt_strategies <- function(model,ops, query,
m_W = 1,
m_K = 1,
n = length(model$var_names),
possible_Ws = possible_W(n, m_W),
possible_Ks = possible_K(n, m_K),
sims = 2000,
round = 2,
Rsq = FALSE,
f = var,
U = NULL,
symbol_unknown ="?"){
if(!is.matrix(possible_Ws)) stop("possible_Ws should be a matrix with a column for each V")
if(!is.matrix(possible_Ks)) stop("possible_Ks should be a matrix with a column for each V")
if(is.null(U)) U <- as.matrix(t(replicate(sims, model$P())))
nW <- nrow(possible_Ws)
nK <- nrow(possible_Ks)
x <- sapply(1:nW, function(i) {
pt_learning(model, ops, query, U = U, W = possible_Ws[i,], Ks = possible_Ks, f=f)})
if(Rsq) {
x2 <- 1+t((t(x) - x[2,])/x[2,])
x[3:nrow(x)] <- x2[3:nrow(x)]}
x <- round(t(x), round)
# Add rownames that show the W data pattern
wn <- possible_Ws
wn[is.na(wn)] <- symbol_unknown
rownames(x) <- sapply(1:nrow(wn), function(r) {paste(wn[r,], collapse = "")} )
x
}
#' Declare a process tracing design from a causal model
#'
#' Warning -- do not use redesign with design produced from this designer
#'
#' @param true_model A model: not available to applied researcher
#' @param estimation_model A model: as supposed by applied researcher
#' @param ops Operations that form part of the query
#' @param query The query
#' @param strategy Data gathering strateg -- a vector of TRUES or FALSES indicating which nodes are observed
#' @param N Population for which inferences might be made. Estimand averages over these.
#' @param sims Precision for posterior calculation
#' @export
#' @examples
#' model <- pt_model()
#' ops = list(c(0, NA, NA, NA, NA), c(1, NA, NA, NA, NA))
#' query = function(x) (x[[1]][5] ==1) & (x[[2]][5] == 0)
#' design <- pt_designer(model, ops = ops, query = query, sims = 500)
#' draw_estimates(design)
pt_designer <- function(true_model,
estimation_model = true_model,
ops,
query,
strategy = TRUE, # Nodes that get observed
N = 1, # Population for which inferences might be made
sims = 2000 # Precision for posterior calculation
){
believed_distn <- pt_which(estimation_model, ops = ops, query = query, sims = sims)
estimand_function <- function(data, label) {
U <- data[paste0("U_", true_model$var_names)]
x <- mean(pt_which(true_model, ops = ops, query = query, U = U)$A)
data.frame(estimand_label = label, estimand = x, stringsAsFactors = FALSE)}
# function to get the mean posterior estimate over a set of cases
pt_est = function(data, believed_distn, strategy){
# Actual data
Ks <- data[true_model$var_names]
# Observed data
Ks[!strategy] <- NA
# Possible data, consistent with observed Ks
possible <- apply(believed_distn$V, 1, function(x) !(0 %in% ((Ks == x))))
# Posteriors condition on possible
data.frame(estimate = mean(believed_distn$A[possible]),
prior_var = var(believed_distn$A),
prior = mean(believed_distn$A),
posterior_var = var(believed_distn$A[possible]))}
diagnosands <- declare_diagnosands(
bias = mean(estimate - estimand, na.rm = TRUE),
rmse = sqrt(mean((estimate - estimand)^2, na.rm = TRUE)),
mean_estimand = mean(estimand, na.rm = TRUE),
mean_prior_var = mean(prior_var, na.rm = TRUE),
mean_posterior_var = mean(posterior_var, na.rm = TRUE),
failed_estimation = mean(is.na(estimate)),
keep_defaults = FALSE)
design <-
declare_population(data = pt_worlds(true_model, N = N)) +
declare_estimand(handler = estimand_function, label = "Q") +
declare_sampling(n = 1) +
declare_estimator(handler = tidy_estimator(pt_est),
believed_distn = believed_distn,
strategy = strategy,
estimand = "Q")
set_diagnosands(design, diagnosands)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.