##############################################################
# Bayesian Networks
# 4/4/2021
# created by T. Gärtner (thomas.gaertner@student.hpi.de)
##############################################################
#' Prepare Dag for BN Methods
#' @description This function reformat a `DAGitty` to `bn.learn` DAG. This is required for further methods.
#' @param dag DAG in a DAGitty format
#' @return bnlearn graph based on DAGitty graph
#' @examples
#' bn.prep.dag(simpatdag)
#' @export
bn.prep.dag <- function(dag){
exposure <- dagitty::exposures(dag)
outcome <- dagitty::outcomes(dag)
names <- names(dag)
bn.dag <- bnlearn::empty.graph(nodes = names(dag))
edges <- dagitty::edges(dag)
edges$from <- edges$v
edges$to <- edges$w
edges[] <- lapply(edges,as.character)
bnlearn::arcs(bn.dag) = edges[,c("from","to")]
plot(bn.dag, main = "Transformed Bn DAG")
return(list(dag = bn.dag, exposure = exposure, outcome = outcome, variables=names))
}
#' Bayesian Network Analysis
#' @description This function is an Interface to the fitting function for Bayesian Networks from bnlearn
#' @param dag DAG generated by
#' @param data R data frame to fit the Bayesian network
#' @param method Method used to analyze. Default is `bayes`. Alternative is `mle`
#' @return bn.fit object
#' @examples
#' # specify column names
#' id <- "patient_id"
#' time_col <- "day"
#'
#' # Load data
#' load("data/simpatdag.rda")
#' load("data/simpatdat.rda")
#'
#' # Dag preprocessing
#' bn.dag <- bn.prep.dag(simpatdag)
#'
#' # Data Preprocessing (Factorization)
#' simpatdat$Uncertain_Low_Back_Pain <- as.factor(simpatdat$Uncertain_Low_Back_Pain)
#' simpatdat$Activity <- cut(simpatdat$Activity, 3, labels=c("low Activity", "middle Activity", "high Activity"))
#' bn.data <- bn.prep.data(bn.dag, simpatdat, id, time_col)
#' bn.fit.dag(bn.data, bn.dag, method="bayes")
#' @export
bn.fit.dag <- function(data, dag, method = "bayes"){
fit <- bnlearn::bn.fit(dag$dag, data, method = method)
return(fit)
}
#' Preprocess data
#' @description This function is preprocessing a dataframe to be used for bn.fit. For that, columns were transformed into factors and lags were created if required.
#' @param dag result from bn.prep.dag
#' @param data R data frame to fit the Bayesian network
#' @param id column name of patient identifier (only needed when lag column)
#' @param time_col column name of time (integer expected, only needed when lag column)
#' @param factorize specify weather all columns should be factors or not
#' @return data frame
#' @examples
#' # specify column names
#' id <- "patient_id"
#' time_col <- "day"
#'
#' # Load data
#' load("data/simpatdag.rda")
#' load("data/simpatdat.rda")
#'
#' # Dag preprocessing
#' bn.dag <- bn.prep.dag(simpatdag)
#'
#' # Data Preprocessing (Factorization)
#' simpatdat$Uncertain_Low_Back_Pain <- as.factor(simpatdat$Uncertain_Low_Back_Pain)
#' simpatdat$Activity <- cut(simpatdat$Activity, 3, labels=c("low Activity", "middle Activity", "high Activity"))
#' bn.data <- bn.prep.data(bn.dag, simpatdat, id, time_col)
#' @export
bn.prep.data <- function(dag, data, id="patient_id", time_col="day", factorize=FALSE, normalize=FALSE){
library(doParallel)
# get relevant columns
names <- dag$variables
# calculate lag
for(name in names){
# test, if lag is included
if(grepl(".lag_", name, fixed=TRUE)){
# extract lag and col information
res <- stringr::str_split(name, ".lag_", n = 2, simplify = TRUE)
var_name <- res[1]
lag <- as.numeric(res[2])
# create lag column
no_cores <- detectCores() - 1
registerDoParallel(cores=no_cores)
cl <- makeCluster(no_cores, type="FORK")
data <- foreach(id=unique(data[,"patient_id"]), .combine=rbind) %dopar% {
dat <- data[data[,"patient_id"]==id,]
dat[,name] <- c(rep(NA, lag), dat[,var_name])[1:nrow(dat)]
dat}
stopCluster(cl)
#for(row in c(1:nrow(data))){
# data_point <- data[data[,id] == data[row,id] & data[,time_col] == data[row,time_col]-lag,]
# if (!nrow(data_point)==0){
# data[row,name] <- data_point[1,var_name]
# }else{
# data[row,name] <- NA
# }
#}
if(lapply(data, class)[[var_name]]=="factor"){
data[,name] <- as.factor(data[,name])
levels(data[,name]) <- levels(data[,var_name])
}
}
# factorize
if(normalize){
data[,name] <- prep.normalize(data = data[,name], scale="uni")
}
if(factorize & name != dag$outcome){
data[,name] <- as.factor(data[,name])
}
}
# filter column names
data <- data[,names]
# return dataframe
return(data)
}
#' Calculate Results
#' @description This function calculates for each exposure the mean outcome on a sample and was implemented to deliver fast results. Nevertheless, the function should not used extensively for giving results as they are not reproducable.
#' @param dag result from bn.prep.dag
#' @param fitted.model bn.fitted model
#' @param exposure_levels levels to test exposure for
#' @param verbose Identify if the function should print the results.
#' @return data frame
#' @examples
#' bn.results.samples(fitted.bn, bn.dag, exposure_levels, FALSE)
#' @export
bn.results.samples <- function(fitted.model, dag, exposure_levels, verbose=TRUE){
# Function to calculate standard error
standard.error <- function(x){
return(sd(x)/sqrt(length(x)))
}
results <- data.frame(Exposure = rep(NA,0), Mean = rep(NA,0), Std.Error = rep(NA,0))
data <- list()
# Loop over all exposures
for(exp_level in exposure_levels){
exp <- list(exp = exp_level)
names(exp) <- dag$exposure
sim.outcome <- na.omit(bnlearn::cpdist(fitted.model, node = c(dag$outcome), evidence = exp, method = "lw"))
sim.mean <- mean(sim.outcome[,dag$outcome])
sim.err <- standard.error(sim.outcome[,dag$outcome])
results[nrow(results)+1,] <- c(exp, sim.mean, sim.err)
if(verbose){
lines <- c(exp_level,
paste("Mean", sim.mean),
paste("Std.Error", sim.err))
writeLines(paste(lines, sep="\n", collapse = "\n"))
}
data[exp_level] <- sim.outcome
}
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.