inst/extdata/scripts/alarm_analysis_2.R

# Repeating the experiments on alarm data
library(bninfo)
library(dplyr)
library(magrittr)
library(combinat)
library(parallel)

data(alarm)
####################################################
# Use alarm because it can have a causal interpretation.
# But take it down to a set of 4 nodes, with one v-structures.
####################################################
full_net <- empty.graph(names(alarm))
modelstring(full_net) <- paste("[HIST|LVF][CVP|LVV][PCWP|LVV][HYP][LVV|HYP:LVF]",
                         "[LVF][STKV|HYP:LVF][ERLO][HRBP|ERLO:HR][HREK|ERCA:HR][ERCA]",
                         "[HRSA|ERCA:HR][ANES][APL][TPR|APL][ECO2|ACO2:VLNG][KINK]",
                         "[MINV|INT:VLNG][FIO2][PVS|FIO2:VALV][SAO2|PVS:SHNT][PAP|PMB][PMB]",
                         "[SHNT|INT:PMB][INT][PRSS|INT:KINK:VTUB][DISC][MVS][VMCH|MVS]",
                         "[VTUB|DISC:VMCH][VLNG|INT:KINK:VTUB][VALV|INT:VLNG][ACO2|VALV]",
                         "[CCHL|ACO2:ANES:SAO2:TPR][HR|CCHL][CO|HR:STKV][BP|CO:TPR]", sep = "")
.subset <- c("INT", "PRSS", "VLNG", "MINV")
ground_truth <- bnlearn::subgraph(full_net, .subset)
####################################################
# Use simulated from the network structure so data is
# definately faithful to the graph structure.
####################################################
n <- 100000
.data <- bn.fit(ground_truth, alarm[, .subset], "bayes") %>%
  rbn(n)


###############################################################
# Make an isachs-like dataset with faithful interventions.
###############################################################
set.seed(63)
obs <- cbind(rbn(ground_truth, 10000, .data, fit = "bayes"), intervention = 0)
ialarm <- lapply(names(.data), function(node){
  dist <- table(filter(ialarm, intervention == 0)[node])
  level <- names(dist)[dist == max(dist)]
  ground_truth %>%
    bn.fit(.data, method = "bayes") %>%
    mutilated(evidence = structure(list(level), names = node)) %>%
    rbn(10000) %>%
    mutate(intervention = which(names(.) == node))
  }) %>%
  {do.call("rbind", .)} %>%
  {rbind(obs, .)}

table(filter(ialarm, intervention == 0)$INT)
table(filter(ialarm, intervention == 1)$INT)
table(filter(ialarm, intervention == 0)$PRSS)
table(filter(ialarm, intervention == 2)$PRSS)
table(filter(ialarm, intervention == 0)$VLNG)
table(filter(ialarm, intervention == 3)$VLNG)
table(filter(ialarm, intervention == 0)$MINV)
table(filter(ialarm, intervention == 4)$MINV)
###############################################################
# Check the strength of edges in the bootstrap results
# There should be less entropy in the model averaging results.
###############################################################
# Set cpdag to FALSE in case V-structures provide causal direction
# information
set.seed(85)
boot <- boot.strength(.data, algorithm = "tabu",
                      algorithm.args = list(tabu = 50,
                                            score = "bde"),
                      cpdag = FALSE)
reduce_averaging(boot)
# from   to strength direction
# 1  INT PRSS        1     0.830
# 2  INT VLNG        1     0.450
# 3  INT MINV        1     0.435
# 4 PRSS VLNG        0     0.000
# 5 PRSS MINV        0     0.000
# 6 VLNG MINV        1     0.445

# Ideal results, where the edges are all there but the directions are still
# quite ambiguous.

###############################################################
# Next, let's make sure the mutated data is giving me reasonable
# results without the prior
###############################################################
int <- lapply(1:4, function(x) {which(ialarm$intervention == x)})
names(int) <- names(ialarm)[1:4]
library(parallel)
cl <- makeCluster(4)
set.seed(100)
boot_mbde <- boot.strength(ialarm[, 1:4], cl, algorithm = "tabu",
                           algorithm.args = list(score = "mbde",
                                                 exp = int,
                                                 tabu = 50), cpdag = FALSE)


###############################################################
# Sanity check to make sure mbde works with CS prior.
# There should be less entropy in the model averaging results.
###############################################################

mean(averaging_entropy(boot)$entropy)
# initial entropy is .6651

beta <- boot %>%
  mutate(prob = strength * direction) %>%
  select(from, to, prob)

INT <- lapply(1:4, function(x) {which(ialarm$intervention == x)})
names(INT) <- names(ialarm)[1:4]
library(parallel)
cl <- makeCluster(4)
set.seed(100)
boot_mbde <- boot.strength(ialarm[, 1:4], cl, algorithm = "tabu",
                        algorithm.args = list(score = "mbde",
                                              exp = INT, prior = "cs",
                                           #   beta = beta,
                                           #   iss = 2000000000,
                                              tabu = 50), cpdag = FALSE)
plot(beta$prob, (boot_mbde$strength * boot_mbde$direction),
     ylim = c(0,1), xlim = c(0,1))


boot_mbde %>%
  reduce_averaging
#   from   to strength direction
# 1  INT PRSS        1     0.830
# 2  INT VLNG        1     0.450
# 3  INT MINV        1     0.435
# 4 PRSS VLNG        0     0.000
# 5 PRSS MINV        0     0.000
# 6 VLNG MINV        1     0.445

# from   to strength  direction
# 1  INT PRSS    0.835 0.02380952
# 2  INT VLNG    0.875 0.09725159
# 3  INT MINV    0.825 0.03011364
# 4 PRSS VLNG    0.640 0.69059832
# 5 PRSS MINV    0.635 0.36841737
# 6 VLNG MINV    0.870 0.28111472


# The prior was constructed on a dataset of 100000 observations.
# So additional data with no interventional information should
# not be more informative than interventional data.
obs <- rbn(ground_truth, nrow(ialarm), .data, fit = "bayes")
boot_bde <- boot.strength(obs, cl, algorithm = "tabu",
                          algorithm.args = list(score = "bde",
                                                prior = "cs",
                                                beta = beta,
                                                iss = nrow(.data),
                                                tabu = 50), cpdag = FALSE)
abs(mean(averaging_entropy(boot_bde)$entropy) -
      mean(averaging_entropy(boot_mbde)$entropy)) > .01




#    from   to  boot_s boot_d bde_s bde_d mbde_s mbde_d
# 1   PMB  PAP  ...

################################################################################
## Algo for calculating the ordering
################################################################################
.new_ordering <- function(ordering, starting_boot, wl = NULL, bl = NULL){
  current_beta <- starting_boot %>%
    mutate(prob = strength * direction) %>%
    select(from, to, prob)
  entropies <- mean(averaging_entropy(starting_boot)$entropy)
  initial_sample_size <- nrow(.data)
  prior_sample_size <- initial_sample_size
  node_names <- names(ialarm)[1:4]
  for(i in 1:length(ordering)){
    print(ordering[1:i])
    int_dex <- c(0, which(node_names %in% ordering[1:i]))
    int_data <- filter(ialarm, intervention %in% int_dex)
    INT <- lapply(1:4, function(x) {which(int_data$intervention == x)})
    names(INT) <- node_names
    boot_i <- boot.strength(int_data[, 1:4], cl, algorithm = "tabu",
                            algorithm.args = list(score = "mbde",
                                                  exp = INT, prior = "cs",
                                                  beta = current_beta,
                                                  whitelist = wl,
                                                  blacklist = bl,
                                                  iss = prior_sample_size,
                                                  tabu = 50), cpdag = FALSE)
    entropies <- c(entropies, mean(averaging_entropy(boot_i)$entropy))
    # Set up next iteration
    current_beta <- boot_i %>%
      mutate(prob = strength * direction) %>%
      select(from, to, prob)
    prior_sample_size <- nrow(int_data) + initial_sample_size
  }
  names(entropies) <- c("obs", ordering)
  entropies
}

new_ordering <- function(ordering, starting_boot, wl = NULL, bl = NULL){
  # This little hack should send errors out as messages,
  # and just retry the algorithm until successful rather than
  # stopping.
  entropies <- "error"
  while(entropies == "error"){
    entropies <- tryCatch(.new_ordering(ordering, starting_boot,
                                        wl = wl, bl = bl),
                          error = function(e){
                            message(e)
                            "error"
                          })
  }
  entropies
}
################################################################################
## Simulation with just observational data
################################################################################
cl <- makeCluster(4)
int_targets <- c("INT", "PRSS", "VLNG", "MINV")
permutations <- permn(int_targets)
averaging_entropies_vanilla <- lapply(permutations, new_ordering, starting_boot = boot)
save(averaging_entropies_vanilla, file = "averaging_entropies_vanilla_alarm_2-10-16.Rdata")
################################################################################
## Algo for the stim sim.
################################################################################
do_sim <- function(model_averaging){
  candidates <- int_targets
  selected <- NULL
  entropies <- NULL
  info_gains <- NULL
  p_vals <- NULL
  while(length(candidates) > 0){
    null_dist <- null_IG_distribution(.data, model_averaging, selected,
                                      k = 30, debug = TRUE)
    sim_results <- select_next_inhibition(.data, model_averaging, selected,
                                          candidates, k = 30, debug = TRUE)
    next_inh <- sim_results$top_candidate
    next_entropy <- sim_results$predicted_entropy
    next_gain <- sim_results$predicted_gain
    entropies <- c(entropies, next_entropy)
    info_gains <- c(info_gains, next_gain)
    p_val <- right_side_pval(null_dist, next_gain)
    p_vals <- c(p_vals, p_val)
    candidates <- setdiff(candidates, next_inh)
    selected <- c(selected, next_inh)
    message("null dist when ", selected, " is selected looks like ",
            paste(round(head(null_dist), 4), collapse = ", "))
    print(p_val)
    print(selected)
    if(length(candidates) == 0) break
  }
}
################################################################################
## Simulation with just observational data
################################################################################
no_prior_result <- do_sim(averaging_entropy(boot))
save(no_prior_result, file = "no_prior_result_alarm_2-10-16.Rdata")
robertness/bninfo documentation built on May 27, 2019, 10:32 a.m.