vignettes/baker_demo.R

## ----setup,echo=FALSE,results="hide"---------------------------
suppressPackageStartupMessages({
library(rjags)
library(R2jags)
library(baker)
library(binom)
library(knitr)
})

## ----lkd-------------------------------------------------------
fname = file.path(
  "example_data", 
  "pathogen_category_simulation.csv")
fname = system.file(
  fname, 
  package = "baker")
demodat = read.csv(fname, stringsAsFactors = FALSE)
kable(head(demodat))

## ----doit------------------------------------------------------
# Note: the example will only run 100 Gibbs sampling steps to save computing time.
# To produce useful posterior inferences, please modify "mcmc_options" as follows
#                      "n.itermcmc" to 50000
#                      "n.burnin"   to 10000,
#                      "n.thin"     to 40,


working_dir <- tempdir() # <-- create a temporary directory.

K.true  <- 2   # no. of latent subclasses in actual simulation. 
# If eta = c(1,0), K.true is effectively 1.
J       <- 6   # no. of pathogens.
N       <- 250 # no. of cases/controls.

# case subclass weight (five values):
subclass_mix_seq <- c(0,0.25,0.5,0.75,1)


NREP   <- 100
MYGRID <- expand.grid(list(rep   = 1:NREP, # data replication.
                           iter  = seq_along(subclass_mix_seq),# mixing weights.
                           k_fit = c(1,2), # model being fitted: 1 for pLCM; >1 for npLCM.
                           scn   = 3:1)    # index for different truth; see "scn_collection.R".
)

n_seed   <- nrow(unique(MYGRID[,-3]))
seed_seq <- rep(1:n_seed,times=length(unique(MYGRID[,3])))

SEG   <- 1 # The value could be 1 to nrow(MYGRID)=3000; here we just simulate one data set.
scn   <- MYGRID$scn[SEG]
k_fit <- 2#MYGRID$k_fit[SEG] 
iter  <- MYGRID$iter[SEG] 
rep   <- MYGRID$rep[SEG] 


# current parameters:
curr_mix <- subclass_mix_seq[iter]
lambda   <- c(0.5,0.5) #c(curr_mix,1-curr_mix)
eta      <- c(curr_mix,1-curr_mix) 

# set fixed simulation sequence:
seed_start <- 20161215  
set.seed(seed_start+seed_seq[SEG])

if (scn == 3){
  ThetaBS_withNA <- cbind(c(0.95,0.95,0.55,0.95,0.95,0.95),#subclass 1.
                          c(0.95,0.55,0.95,0.55,0.55,0.55))#subclass 2.
  PsiBS_withNA   <- cbind(c(0.4,0.4,0.05,0.2,0.2,0.2),     #subclass 1.
                          c(0.05,0.05,0.4,0.05,0.05,0.05)) #subclass 2.
}

if (scn == 2){
  ThetaBS_withNA <- cbind(c(0.95,0.9,0.85,0.9,0.9,0.9),   #subclass 1.
                          c(0.95,0.9,0.95,0.9,0.9,0.9))   #subclass 2.
  PsiBS_withNA   <- cbind(c(0.3,0.3,0.15,0.2,0.2,0.2),    #subclass 1.
                          c(0.15,0.15,0.3,0.05,0.05,0.05))#subclass 2.
}

if (scn == 1){
  ThetaBS_withNA <- cbind(c(0.95,0.9,0.9,0.9,0.9,0.9),#subclass 1.
                          c(0.95,0.9,0.9,0.9,0.9,0.9))#subclass 2.
  PsiBS_withNA   <- cbind(c(0.25,0.25,0.2,0.15,0.15,0.15),#subclass 1.
                          c(0.2,0.2,0.25,0.1,0.1,0.1))    #subclass 2.
}



# the following paramter names are set using names in the 'baker' package:
set_parameter <- list(
  cause_list      = c(LETTERS[1:J]),
  etiology        = c(0.5,0.2,0.15,0.05,0.05,0.05),# same length as cause_list.
  pathogen_BrS    = LETTERS[1:J],
  meas_nm         = list(MBS = c("MBS1")), # a single source of Bronze Standard (BrS) data.
  Lambda          = lambda,              #ctrl mix (subclass weights).
  Eta             = t(replicate(J,eta)), #case mix; # of rows equals length(cause_list).
  PsiBS           = PsiBS_withNA,
  ThetaBS         = ThetaBS_withNA,
  Nu      =     N, # control sample size.
  Nd      =     N  # case sample size.
)

# # visualize pairwise log odds ratios for cases and controls when eta changes 
# # from 0 to 1. In the following simulation, we just use one value: eta=0.
# example("compute_logOR_single_cause")

simu_out   <- simulate_nplcm(set_parameter)
data_nplcm <- simu_out$data_nplcm

## ----expl, fig.height=10, fig.width=10-------------------------

# specify cause list:
cause_list <- set_parameter$cause_list

# specify measurements:
# bronze-standard measurements:
patho_BrS_MBS1      <- set_parameter$pathogen_BrS
BrS_object_1        <- make_meas_object(patho_BrS_MBS1,"MBS","1","BrS",cause_list)
   # please use ?make_meas_object to see the measurement standards.



# pairwise log odds ratio plot:
pathogen_display <- BrS_object_1$patho
plot_logORmat(data_nplcm,pathogen_display,1)

## ----domod-----------------------------------------------------
m_opt1 <- list(likelihood   = list(cause_list = cause_list,               # <---- fitted causes.
                                   k_subclass = k_fit,                    # <---- no. of subclasses.
                                   Eti_formula = "~ 0",                   # <---- only apply FPR formula to specified slice of measurements; if not default to the first slice.
                                   FPR_formula = list(MBS1 = "~0")),      # <---- etiology regression formula.
               use_measurements = c("BrS"),                               # <---- which measurements to use to inform etiology
               prior        = list(Eti_prior   = overall_uniform(1, cause_list) ,                       # <--- etiology prior. 
                                   TPR_prior   = list(
                                     BrS  = list(info  = "informative",
                                                 input = "direct_beta_param",
                                                 val   = list(
                                                          MBS1 = list(alpha = list(rep(6,length(set_parameter$pathogen_BrS))),
                                                                      beta  = list(rep(2,length(set_parameter$pathogen_BrS)))
                                                                     )
                                                 )
                                                 
                                     )
                                     
                                   )# <---- TPR prior.
               )
)                       

model_options <- m_opt1
assign_model(model_options,data_nplcm)

## ----dofit, eval=FALSE-----------------------------------------
#  # date stamp for analysis:
#  Date     <- gsub("-", "_", Sys.Date())
#  # include stratification information in file name:
#  dated_strat_name    <- file.path(working_dir,
#                                   paste0("scn_",scn,"_mixiter_",iter))
#  if (dir.exists(dated_strat_name)) {
#    unlink(dated_strat_name, force = TRUE)
#  }
#  
#  # create folder
#  dir.create(dated_strat_name)
#  fullname <- dated_strat_name
#  
#  # for finer scenarios, e.g., different types of analysis applicable to the
#  # same data set. Here we just perform one analysis:
#  result_folder <- file.path(
#    fullname,
#    paste0("rep_", rep, "_kfit_",
#           model_options$likelihood$k_subclass))
#  dir.create(result_folder)
#  
#  
#  # options for MCMC chains:
#  mcmc_options <- list(
#    individual.pred = !TRUE,
#    ppd             = TRUE,
#    n.chains   = 1,
#    n.itermcmc = as.integer(200), #50000
#    n.burnin   = as.integer(100), #10000
#    n.thin     = 1, #50
#    result.folder = result_folder,
#    bugsmodel.dir = result_folder
#  )
#  
#  # Record the settings of current analysis:
#  # data clean options:
#  fname = file.path(
#    "example_data",
#    "pathogen_category_simulation.csv")
#  fname = system.file(
#    fname,
#    package = "baker")
#  global_patho_taxo_dir = fname
#  
#  clean_options <- list(
#    BrS_objects        =  make_list(BrS_object_1),         # <---- all bronze-standard measurements.
#    patho_taxo_dir = global_patho_taxo_dir,
#    allow_missing      = FALSE)
#  
#  # place the nplcm data and cleaning options into the results folder
#  dput(data_nplcm,file.path(mcmc_options$result.folder,"data_nplcm.txt"))
#  dput(clean_options,file.path(mcmc_options$result.folder,"data_clean_options.txt"))
#  
#  gs <- nplcm(data_nplcm, model_options, mcmc_options)

## ----dovizsetup,eval=FALSE-------------------------------------
#  result_folder <- mcmc_options$result.folder

## ----lkpan,fig.height=10, fig.width=10, eval=FALSE-------------
#  suppressWarnings(plot(gs, bg_color = NULL))

## ----lkpan2,fig.height=10, fig.width=10, eval=FALSE------------
#  plot(gs, bg_color = NULL, select_latent = c("A","C"), exact = TRUE)

## ----doSLORD,fig.height=10, fig.width=10, eval=FALSE-----------
#  plot_check_pairwise_SLORD(result_folder, slice=1)

## ----doobsf,fig.height=10, fig.width=10, eval=FALSE------------
#  dir_list <- as.list(c(result_folder))
#  plot_check_common_pattern(dir_list,slice_vec =c(1,1))
zhenkewu/baker documentation built on Feb. 7, 2024, 4:20 p.m.