R/fitting.R
In EMC2: Bayesian Hierarchical Analysis of Cognitive Models of Choice

Documented in make_emc run_emc

get_stop_criteria <- function(stage, stop_criteria, type){
  if(is.null(stop_criteria)){
    if(stage == "preburn"){
      stop_criteria$iter <- 50
    }
    if(stage == "burn"){
      stop_criteria$mean_gd <- 1.1
      stop_criteria$omit_mpsrf <- TRUE
      if(type != "single"){
        stop_criteria$selection <- c("alpha", "mu")
      } else{
        stop_criteria$selection <- c("alpha")
      }

    }
    if(stage == "adapt"){
      stop_criteria$min_unique <- 150
    }
    if(stage == "sample"){
      stop_criteria$max_gd <- 1.1
      stop_criteria$omit_mpsrf <- TRUE
      if(type != "single"){
        stop_criteria$selection <- c("alpha", "mu")
      } else{
        stop_criteria$selection <- c("alpha")
      }
    }
  }
  if(!is.null(stop_criteria$max_gd) || !is.null(stop_criteria$mean_gd)){
    if(is.null(stop_criteria$selection)) stop_criteria$selection <- c('alpha', 'mu')
    if(is.null(stop_criteria$omit_mpsrf)) stop_criteria$omit_mpsrf <- TRUE
  }
  if(stage == "adapt" & is.null(stop_criteria$min_unique)) stop_criteria$min_unique <- 600
  if(stage != "adapt" & !is.null(stop_criteria$min_unique)) stop("min_unique only applicable for adapt stage, try min_es instead.")
  return(stop_criteria)
}

#' Fine-Tuned Model Estimation
#'
#' Although typically users will rely on ``fit``, this function can be used for more fine-tuned specification of estimation needs.
#' The function will throw an error if a stage is skipped,
#' the stages have to be run in order ("preburn", "burn", "adapt", "sample").
#' More details can be found in the ``fit`` help files (``?fit``).
#'
#' @param emc An emc object
#' @param stage A string. Indicates which stage is to be run, either `preburn`, `burn`, `adapt` or `sample`
#' @param search_width A double. Tunes target acceptance probability of the MCMC process.
#' This fine-tunes the width of the search space to obtain the desired acceptance probability.
#' 1 is the default width, increases lead to broader search.
#' @param step_size An integer. After each step, the stopping requirements as
#' specified by `stop_criteria` are checked and proposal distributions are updated. Defaults to 100.
#' @param verbose Logical. Whether to print messages between each step with the current status regarding the stop_criteria.
#' @param verboseProgress Logical. Whether to print a progress bar within each step or not. Will print one progress bar for each chain and only if cores_for_chains = 1.
#' @param fileName A string. If specified will autosave emc at this location on every iteration.
#' @param particle_factor An integer. `particle_factor` multiplied by the square root of the number of sampled parameters determines the number of particles used.
#' @param cores_per_chain An integer. How many cores to use per chain.
#' Parallelizes across participant calculations. Only available on Linux or Mac OS.
#' For Windows, only parallelization across chains (``cores_for_chains``) is available.
#' @param cores_for_chains An integer. How many cores to use across chains.
#' Defaults to the number of chains. the total number of cores used is equal to ``cores_per_chain`` * ``cores_for_chains``.
#' @param max_tries An integer. How many times should it try to meet the finish
#' conditions as specified by stop_criteria? Defaults to 20. max_tries is ignored if the required number of iterations has not been reached yet.
#' @param n_blocks An integer. Number of blocks. Will block the parameter chains such that they are updated in blocks. This can be helpful in extremely tough models with a large number of parameters.
#' @param stop_criteria A list. Defines the stopping criteria and for which types of parameters these should hold. See ``?fit``.
#' @param thin A boolean. If `TRUE` will automatically thin the MCMC samples, closely matched to the ESS.
#' Can also be set to a double, in which case 1/thin of the chain will be removed (does not have to be an integer).
#' @param trim A boolean. If `TRUE` will automatically remove redundant samples (i.e. from preburn, burn, adapt).
#' @export
#' @return An emc object
#' @examples \donttest{
#' # First define a design
#' design_in <- design(data = forstmann,model=DDM,
#'                            formula =list(v~0+S,a~E, t0~1, s~1, Z~1),
#'                            constants=c(s=log(1)))
#' # Then make the emc, we've omitted a prior here for brevity so default priors will be used.
#' emc <- make_emc(forstmann, design_in)
#'
#' # Now for example we can specify that we only want to run the "preburn" phase
#' # for MCMC 10 iterations
#' emc <- run_emc(emc, stage = "preburn", stop_criteria = list(iter = 10), cores_for_chains = 1)
#'}

run_emc <- function(emc, stage, stop_criteria,
                    search_width = 1, step_size = 100, verbose = FALSE, verboseProgress = FALSE,
                    fileName = NULL,particle_factor=50, cores_per_chain = 1,
                    cores_for_chains = length(emc), max_tries = 20, n_blocks = 1,
                    thin = FALSE, trim = TRUE){
  emc <- restore_duplicates(emc)
  if(Sys.info()[1] == "Windows" & cores_per_chain > 1) stop("only cores_for_chains can be set on Windows")
  if (verbose) message(paste0("Running ", stage, " stage"))
  total_iters_stage <- chain_n(emc)[,stage][1]
  if(stage != "preburn"){
    iter <- stop_criteria[["iter"]] + total_iters_stage
  } else{
    iter <- stop_criteria[["iter"]]
  }
  progress <- check_progress(emc, stage, iter, stop_criteria, max_tries, step_size, cores_per_chain*cores_for_chains, verbose, n_blocks = n_blocks)
  emc <- progress$emc
  progress <- progress[!names(progress) == 'emc']
  # We need to multiply step_size by thin to make an accurate guess for good step_size.
  cur_thin <- ifelse(is.numeric(thin), thin, 1)
  while(!progress$done){
    emc <- reset_pm_settings(emc, stage)
    # Remove redundant samples
    if(trim){
      emc <- fit_remove_samples(emc)
    }
    if(!is.null(progress$n_blocks)) n_blocks <- progress$n_blocks
    emc <- add_proposals(emc, stage, cores_per_chain*cores_for_chains, n_blocks)
    last_stage <- get_last_stage(emc)
    if(stage == "preburn"){
      sub_emc <- emc
    } else if(stage != last_stage){
      sub_emc <- subset(emc, filter = chain_n(emc)[1,last_stage]-1, stage = last_stage)
    } else{
      sub_emc <- subset(emc, filter = chain_n(emc)[1,stage] - 1, stage = stage)
    }
    # Actual sampling
    sub_emc <- auto_mclapply(sub_emc,run_stages, stage = stage, iter= progress$step_size*max(1,cur_thin),
                             verbose=verbose,  verboseProgress = verboseProgress,
                             particle_factor=particle_factor,search_width=search_width,
                             n_cores=cores_per_chain, mc.cores = cores_for_chains)

    class(sub_emc) <- "emc"
    if(stage != 'preburn'){
      if(is.numeric(thin)){
        sub_emc <- subset(sub_emc, stage = c("preburn", "burn", "adapt", "sample"), thin = thin)
      } else if(thin){
        sub_emc <- auto_thin(sub_emc, stage = c("preburn", "burn", "adapt", "sample"))
        # Update current rough guess for thinning:
        cur_thin <- progress$step_size/chain_n(sub_emc)[1,stage]
      }
      emc <- concat_emc(emc, sub_emc, progress$step_size, stage)
    } else{
      emc <- sub_emc
    }
    progress <- check_progress(emc, stage, iter, stop_criteria, max_tries, step_size, cores_per_chain*cores_for_chains,
                               verbose, progress,n_blocks)
    emc <- progress$emc
    progress <- progress[!names(progress) == 'emc']
    if(!is.null(fileName)){
      emc <- strip_duplicates(emc)
      fileName <- fix_fileName(fileName)
      class(emc) <- "emc"
      save(emc, file = fileName)
      emc <- restore_duplicates(emc)
    }
  }
  emc <- strip_duplicates(emc)
  class(emc) <- "emc"
  return(emc)
}

run_stages <- function(sampler, stage = "preburn", iter=0, verbose = TRUE, verboseProgress = TRUE,
                       particle_factor=50, search_width= NULL, n_cores=1)
{

  max_pars <- max(table(attr(sampler[[1]], "components")))
  particles <- round(particle_factor*sqrt(max_pars))
  if (!sampler$init) {
    sampler <- init(sampler, n_cores = n_cores)
  }
  if (iter == 0) return(sampler)
  tune <- list(search_width = search_width)
  sampler <- run_stage(sampler, stage = stage,iter = iter, particles = particles,
                       n_cores = n_cores, tune = tune, verbose = verbose, verboseProgress = verboseProgress)
  return(sampler)
}

add_proposals <- function(emc, stage, n_cores, n_blocks){
  if(stage != "preburn"){
    # if(!is.null(emc[[1]]$g_map_fixed)){
    #   emc <- create_chain_proposals_lm(emc)
    # } else{    }
    emc <- create_chain_proposals(emc, do_block = stage != "sample")
    if(!is.null(n_blocks)){
      if(n_blocks > 1){
        components <- sub_blocking(emc, n_blocks)
        for(i in 1:length(emc)){
          attr(emc[[i]]$data, "components") <- components
        }
      }
    }
  }
  if(stage == "sample"){
    # if(!is.null(emc[[1]]$g_map_fixed)){
    #   emc <- create_eff_proposals_lm(emc, n_cores)
    # } else{    }
    emc <- create_eff_proposals(emc, n_cores)
  }
  return(emc)
}

check_progress <- function (emc, stage, iter, stop_criteria,
                            max_tries, step_size, n_cores, verbose, progress = NULL,
                            n_blocks)
{
  min_es <- stop_criteria$min_es
  if(is.null(min_es)) min_es <- 0
  selection <- stop_criteria$selection
  min_unique <- stop_criteria$min_unique
  total_iters_stage <- chain_n(emc)[, stage][1]
  if (is.null(progress)) {
    iters_total <- 0
    trys <- 0
  }
  else {
    iters_total <- progress$iters_total + step_size
    trys <- progress$trys + 1
    if (verbose)
      message(trys, ": Iterations ", stage, " = ", total_iters_stage)
  }
  gd <- check_gd(emc, stage, stop_criteria[["max_gd"]], stop_criteria[["mean_gd"]], trys, verbose,
                 iter = total_iters_stage, selection, omit_mpsrf = stop_criteria[["omit_mpsrf"]],
                 n_blocks)
  iter_done <- ifelse(is.null(iter) || length(iter) == 0, TRUE, total_iters_stage >= iter)
  if (min_es == 0) {
    es_done <- TRUE
  } else if (total_iters_stage != 0) {
    class(emc) <- "emc"
    curr_min_es <- Inf
    for(select in selection){
      curr_min_es <- min(c(ess_summary(emc, selection = select,
                                                stage = stage, stat_only = TRUE), curr_min_es))
    }
    if (verbose)
      message("Smallest effective size = ", round(curr_min_es))
    es_done <- ifelse(!emc[[1]]$init, FALSE, curr_min_es >
                        min_es)
  }
  else {
    es_done <- FALSE
  }
  trys_done <- ifelse(is.null(max_tries), FALSE, trys >= max_tries)
  if (stage == "adapt") {
    samples_merged <- merge_chains(emc)
    test_samples <- extract_samples(samples_merged, stage = "adapt",
                                    samples_merged$samples$idx, n_chains = length(emc))
    # if(!is.null(emc[[1]]$g_map_fixed)){
    #   adapted <- test_adapted_lm(emc[[1]], test_samples, min_unique, n_cores, verbose)
    # } else{    }
    adapted <- test_adapted(emc[[1]], test_samples,
                            min_unique, n_cores, verbose)

  }
  else {
    adapted <- TRUE
  }
  done <- (es_done & iter_done & gd$gd_done & adapted) | (trys_done & iter_done)
  if(es_done & gd$gd_done & adapted & !iter_done){
    step_size <- min(step_size, abs(iter - total_iters_stage))[1]
  }
  if (trys_done & iter_done) {
    if(!(es_done & gd$gd_done & adapted)){
      warning("Max tries reached. If this happens in burn-in while trying to get
            gelman diagnostics small enough, you might have a particularly hard model.
            Make sure your model is well specified. If so, you can run adapt and
            sample, if run for long enough, sample usually converges eventually.")
    }
  }
  return(list(emc = gd$emc, done = done, step_size = step_size,
              trys = trys, n_blocks = gd$n_blocks))
}

check_gd <- function(emc, stage, max_gd, mean_gd, omit_mpsrf, trys, verbose,
                     selection, iter, n_blocks = 1)
{
  get_gds <- function(emc,omit_mpsrf, selection, stage) {
    gd_out <- c()
    alpha <- NULL
    for(select in selection){
      gd <- unlist(gd_summary.emc(emc, selection = select, stage = stage,
                                  omit_mpsrf = omit_mpsrf, stat = NULL))
      gd_out <- c(gd_out, c(gd))
      if(select == "alpha"){
        alpha <- gd
      }
    }
    gd_out[is.na(gd_out)] <- Inf
    return(list(gd = gd_out, alpha = alpha))
  }
  if(is.null(max_gd) & is.null(mean_gd)) return(list(gd_done = TRUE, emc = emc))
  if(!emc[[1]]$init | !stage %in% emc[[1]]$samples$stage)
    return(list(gd_done = FALSE, emc = emc))
  if(is.null(omit_mpsrf)) omit_mpsrf <- TRUE
  gd <- get_gds(emc,omit_mpsrf,selection, stage)
  alpha_gd <- gd$alpha
  gd <- gd$gd
  if(!is.null(max_gd)){
    ok_max_gd <- ifelse(all(is.finite(gd)), all(gd < max_gd), FALSE)
  } else{
    ok_max_gd <- TRUE
  }
  if(!is.null(mean_gd)){
    ok_mean_gd <- ifelse(all(is.finite(gd)), mean(gd) < mean_gd, FALSE)
  } else{
    ok_mean_gd <- TRUE
  }

  ok_gd <- ok_mean_gd & ok_max_gd
  if(!ok_gd) {
    n_remove <- round(chain_n(emc)[,stage][1]/3)
    samplers_short <- subset.emc(emc, filter=n_remove,stage=stage, keep_stages = TRUE)
    if (is(samplers_short,"try-error")){
      gd_short <- Inf
    } else{
      gd_short <- get_gds(samplers_short,omit_mpsrf,selection, stage)
      alpha_gd_short <- gd_short$alpha
      gd_short <- gd_short$gd
    }
    if (is.null(max_gd) & (mean(gd_short) < mean(gd)) | (!is.null(max_gd) & (max(gd_short) < max(gd)))) {
      gd <- gd_short
      alpha_gd <- alpha_gd_short
      emc <- samplers_short
    }
    if(!is.null(max_gd)){
      ok_max_gd <- ifelse(all(is.finite(gd)), all(gd < max_gd), FALSE)
    } else{
      ok_max_gd <- TRUE
    }
    if(!is.null(mean_gd)){
      ok_mean_gd <- ifelse(all(is.finite(gd)), mean(gd) < mean_gd, FALSE)
    } else{
      ok_mean_gd <- TRUE
    }
    ok_gd <- ok_mean_gd & ok_max_gd
  }
  if(verbose) {
    type <- "Rhat"
    if (!is.null(mean_gd)) message("Mean ",type," = ",round(mean(gd),3)) else
      if (!is.null(max_gd)) message("Max ",type," = ",round(max(gd),3))
  }
  emc <- set_tune_ess(emc, alpha_gd, mean_gd, max_gd)
  class(emc) <- "emc"
  return(list(gd = gd, gd_done = ok_gd, emc = emc))
}

set_tune_ess <- function(emc, alpha_gd = NULL, mean_gd = NULL, max_gd = NULL){
  if(is.null(alpha_gd)){
    return(emc)
  }
  alpha_gd <- matrix(alpha_gd, emc[[1]]$n_subjects)
  if(!is.null(mean_gd)){
    mean_alpha_ok <- rowMeans(alpha_gd) < 1+(mean_gd-1)*.5
  } else{
    mean_alpha_ok <- rep(T, nrow(alpha_gd))
  }
  if(!is.null(max_gd)){
    max_alpha_ok <- apply(alpha_gd, 1, max) < 1+(max_gd-1)*.5
  } else{
    max_alpha_ok <- rep(T, nrow(alpha_gd))
  }
  alpha_ok <- max_alpha_ok & mean_alpha_ok
  emc <- lapply(emc, function(x){
    pm_settings <- attr(x$samples, "pm_settings")
    pm_settings <- mapply(pm_settings, alpha_ok, FUN = function(y, z){
      for(i in 1:length(y)){
        y[[i]]$gd_good <- z
      }
      return(list(y))
    })
    attr(x$samples, "pm_settings") <- pm_settings
    return(x)
  })
  return(emc)
}


create_eff_proposals <- function(emc, n_cores){
  samples_merged <- merge_chains(emc)
  test_samples <- extract_samples(samples_merged, stage = c("adapt", "sample"), max_n_sample = 750, n_chains = length(emc))
  type <- emc[[1]]$type
  components <- attr(emc[[1]]$data, "components")
  for(i in 1:length(emc)){
    iteration = round(test_samples$iteration * i/length(emc))
    n_pars <- emc[[1]]$n_pars
    nuisance <- emc[[1]]$nuisance
    n_subjects <- emc[[1]]$n_subjects
    eff_mu <- matrix(0, nrow = n_pars, ncol = n_subjects)
    eff_var <- array(0, dim = c(n_pars, n_pars, n_subjects))
    for(comp in unique(components)){
      idx <- comp == components
      nuis_idx <- nuisance[idx]
      if(any(nuis_idx)){
        type <- samples_merged$sampler_nuis$type
        conditionals <- auto_mclapply(X = 1:n_subjects,
                                      FUN = get_conditionals, samples = test_samples,
                                      n_pars = sum(idx[!nuisance]), iteration =  iteration, idx = idx[!nuisance],
                                      type = type ,
                                      mc.cores = n_cores)
        conditionals_nuis <- auto_mclapply(X = 1:n_subjects,
                                           FUN = get_conditionals, samples = test_samples$nuisance,
                                           n_pars = sum(idx[nuisance]), iteration =  iteration, idx = idx[nuisance],
                                           type = type,
                                           mc.cores = n_cores)
        conditionals <- array(unlist(conditionals), dim = c(sum(idx[!nuisance]), sum(idx[!nuisance]) + 1, n_subjects))
        conditionals_nuis <- array(unlist(conditionals_nuis), dim = c(sum(idx[nuisance]), sum(idx[nuisance]) + 1, n_subjects))
        eff_mu[idx & !nuisance,] <- conditionals[,1,]
        eff_var[idx & !nuisance, idx & !nuisance,] <- conditionals[,2:(sum(idx[!nuisance])+1),]
        eff_mu[idx & nuisance,] <- conditionals_nuis[,1,]
        eff_var[idx & nuisance,idx & nuisance,] <- conditionals_nuis[,2:(sum(idx[nuisance])+1),]
      } else{
        conditionals <- auto_mclapply(X = 1:n_subjects,
                                      FUN = get_conditionals, samples = test_samples,
                                      n_pars = sum(idx[!nuisance]), iteration =  iteration, idx = idx[!nuisance],
                                      type = type,
                                      mc.cores = n_cores)
        conditionals <- array(unlist(conditionals), dim = c(sum(idx[!nuisance]), sum(idx[!nuisance]) + 1, n_subjects))
        eff_mu[idx & !nuisance,] <- conditionals[,1,]
        eff_var[idx & !nuisance,idx & !nuisance,] <- conditionals[,2:(sum(idx[!nuisance])+1),]
      }

    }
    # eff_mu <- lapply(conditionals, FUN = function(x) x$eff_mu)
    # eff_var <- lapply(conditionals, FUN = function(x) x$eff_var)
    # eff_alpha <- lapply(conditionals, FUN = function(x) x$eff_alpha)
    # eff_tau <- lapply(conditionals, FUN = function(x) x$eff_tau)

    eff_mu <- split(eff_mu, col(eff_mu))
    eff_var <- apply(eff_var, 3, identity, simplify = F)
    emc[[i]]$eff_mu <- eff_mu
    emc[[i]]$eff_var <- eff_var
    # attr(emc[[i]], "eff_alpha") <- eff_alpha
    # attr(emc[[i]], "eff_tau") <- eff_tau
  }
  return(emc)
}


sub_blocking <- function(emc, n_blocks){
  covs <- lapply(emc, FUN = function(x){return(x$chains_var)})
  out <- array(0, dim = dim(covs[[1]][[1]]))
  for(i in 1:length(covs)){
    cov_tmp <- covs[[1]]
    for(j in 1:length(cov_tmp)){
      out <- out + cov2cor(cov_tmp[[1]])
    }
  }
  shared_ll_idx <- attr(emc[[1]]$data, "shared_ll_idx")
  min_comp <- 0
  components <- c()
  for(ll in unique(shared_ll_idx)){
    idx <- ll == shared_ll_idx
    distance <-as.dist(1- abs(out[idx, idx]/out[1,1]))
    clusts <- hclust(distance)
    sub_comps <- min_comp + cutree(clusts, k = n_blocks) # This could go wrong if one group has just one member
    min_comp <- max(sub_comps)
    components <- c(components, sub_comps)
  }
  return(components)
}

create_chain_proposals <- function(emc, samples_idx = NULL, do_block = TRUE){
  n_subjects <- emc[[1]]$n_subjects
  n_chains <- length(emc)
  n_pars <- emc[[1]]$n_pars
  stage <- emc[[1]]$samples$stage[length(emc[[1]]$samples$stage)]
  if(is.null(samples_idx)){
    idx_subtract <- min(250, emc[[1]]$samples$idx/1.5)
    samples_idx <- round(emc[[1]]$samples$idx - idx_subtract):emc[[1]]$samples$idx
  }
  LL <- get_pars(emc, filter = samples_idx-1, selection = "LL",
                 stage = c('preburn', 'burn', 'adapt', 'sample'),
                 merge_chains = T, return_mcmc = F, remove_constants = F,
                 remove_dup = F)
  alpha <- get_pars(emc, filter = samples_idx-1, selection = "alpha",
                    stage = c('preburn', 'burn', 'adapt', 'sample'),
                    by_subject = T, merge_chains = T, return_mcmc = F,
                    remove_dup = F, remove_constants = F)

  components <- attr(emc[[1]]$data, "components")
  block_idx <- block_variance_idx(components)
  for(j in 1:n_chains){
    # Take only a random half of all the samples for each chain
    rnd_index <- sample(1:ncol(LL), round(ncol(LL)/2))
    chains_var <- vector("list", n_subjects)
    chains_mu <- vector("list", n_subjects)
    for(sub in 1:n_subjects){
      moments <- weighted_moments(alpha[,sub,rnd_index], LL[sub, rnd_index])
      emp_covs <- moments$w_cov
      chains_mu[[sub]] <- moments$w_mu
      if(do_block) emp_covs[block_idx] <- 0
      if(is.negative.semi.definite(emp_covs)){
        # If negative semi definite, do not use it
        next
      } else{
        chains_var[[sub]] <- emp_covs
      }
    }
    # Instead use the mean of all other subjects
    null_idx <- sapply(chains_var, is.null)
    if(all(null_idx)){
      # If all subjects are NULL just come up with something
      mean_chains_var <- diag(n_pars) * .5
    } else{
      mean_chains_var <- Reduce(`+`, chains_var[!null_idx]) / sum(!null_idx)
    }
    if(any(null_idx)){
      for(q in 1:n_subjects){
        if(null_idx[q]) chains_var[[q]] <- mean_chains_var
      }
    }

    new_prop_var <- mean(diag(mean_chains_var))
    if(is.null(attr(emc[[j]], "prop_var"))){
      # This is in preburn case, group-level proposals are wider
      # So we scale the epsilon a bit to account for narrow individual proposals
      prop_var_ratio <- 2
    } else{
      prop_var_ratio <- attr(emc[[j]], "prop_var")/new_prop_var
    }
    if(stage != "sample"){
      emc[[j]] <- update_epsilon_scale(emc[[j]], prop_var_ratio)
    }
    attr(emc[[j]], "prop_var") <- new_prop_var
    emc[[j]]$chains_var <- chains_var
    emc[[j]]$chains_mu <- chains_mu
  }
  return(emc)
}

reset_pm_settings <- function(emc, stage){
  if(stage != get_last_stage(emc) || stage == "burn"){ # In this case we're running a new stage
    for(i in 1:length(emc)){
      pm_settings <- attr(emc[[i]]$samples, "pm_settings")
      attr(emc[[i]]$samples, "pm_settings") <- lapply(pm_settings, function(x){
        for(i in 1:length(x)){
          x[[i]]$proposal_counts <- rep(0, length(x[[i]]$proposal_counts))
          x[[i]]$acc_counts <- rep(0, length(x[[i]]$proposal_counts))
          if(stage != get_last_stage(emc)){
            x[[i]]$iter <- 25
            x[[i]]$mix <- NULL
          }
        }
        return(x)
      })
    }
  }
  return(emc)
}

update_epsilon_scale <- function(pmwgs, prop_var_ratio){
  pm_settings <- attr(pmwgs$samples, "pm_settings")
  pm_settings <- lapply(pm_settings, function(x){
    for(i in 1:length(x)){
      x[[i]]$epsilon <- x[[i]]$epsilon * prop_var_ratio
    }
    return(x)
  })
  attr(pmwgs$samples, "pm_settings") <- pm_settings
  return(pmwgs)
}

test_adapted <- function(sampler, test_samples, min_unique, n_cores_conditional = 1,
                         verbose = FALSE)
{
  # Function used by run_adapt to check whether we can create the conditional.

  # Only need to check uniqueness for one parameter
  first_par <- as.matrix(test_samples$alpha[1, , ])
  if(ncol(first_par) == 1) first_par <- t(first_par)
  # Split the matrix into a list of vectors by subject
  # all subjects is greater than unq_vals
  n_unique_sub <- apply(first_par, 1, FUN = function(x) return(length(unique(x))))
  n_pars <- sampler$n_pars
  components <- attr(sampler$data, "components")
  nuisance <- sampler$nuisance
  if (length(n_unique_sub) != 0 & all(n_unique_sub > min_unique)) {
    if(verbose){
      message("Enough unique values detected: ", min_unique)
      message("Testing proposal distribution creation")
    }
    attempt <- tryCatch({
      for(comp in unique(components)){
        idx <- comp == components
        nuis_idx <- nuisance[idx]
        if(any(nuis_idx)){
          type <- sampler$sampler_nuis$type
          auto_mclapply(X = 1:sampler$n_subjects,
                        FUN = get_conditionals, samples = test_samples$nuisance,
                        n_pars = sum(idx[nuisance]), idx = idx[nuisance],
                        type = type,
                        mc.cores = n_cores_conditional)
        }
        auto_mclapply(X = 1:sampler$n_subjects,FUN = get_conditionals,samples = test_samples,
                      n_pars = sum(idx[!nuisance]), idx = idx[!nuisance], type = sampler$type,
                      mc.cores = n_cores_conditional)
      }
    },error=function(e) e, warning=function(w) w)
    if (any(class(attempt) %in% c("warning", "error", "try-error"))) {
      if(verbose){
        message("Can't create efficient distribution yet")
        message("Increasing required unique values and continuing adaptation")
      }
      return(FALSE)
    }
    else {
      if(verbose) message("Successfully adapted - stopping adaptation")
      return(TRUE)
    }
  } else{
    return(FALSE) # Not enough unique particles found
  }
}

loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}


#' Make an emc Object
#'
#' Creates an emc object by combining the data, prior,
#' and model specification into a `emc` object that is needed in `fit()`.
#'
#' @param data A data frame, or a list of data frames. Needs to have the variable `subjects` as participant identifier.
#' @param design A list with a pre-specified design, the output of `design()`.
#' @param model A model list. If none is supplied, the model specified in `design()` is used.
#' @param type A string indicating whether to run a `standard` group-level, `blocked`, `diagonal`, `factor`, or `single` (i.e., non-hierarchical) model.
#' @param n_chains An integer. Specifies the number of mcmc chains to be run (has to be more than 1 to compute `rhat`).
#' @param compress A Boolean, if `TRUE` (i.e., the default), the data is compressed to speed up likelihood calculations.
#' @param rt_resolution A double. Used for compression, response times will be binned based on this resolution.
#' @param par_groups A vector. Only to be specified with type `blocked`, e.g., `c(1,1,1,2,2)` means the covariances
#' of the first three and of the last two parameters are estimated as two separate blocks.
#' @param prior_list A named list containing the prior. Default prior created if `NULL`. For the default priors, see `?get_prior_{type}`.
#' @param ... Additional, optional arguments.
#' @return An uninitialized emc object
#' @examples dat <- forstmann
#'
#' # function that takes the lR factor (named diff in the following function) and
#' # returns a logical defining the correct response for each stimulus. In this
#' # case the match is simply such that the S factor equals the latent response factor.
#' matchfun <- function(d)d$S==d$lR
#'
#' # design an "average and difference" contrast matrix
#' ADmat <- matrix(c(-1/2,1/2),ncol=1,dimnames=list(NULL,"diff"))
#'
#' # specify design
#' design_LBABE <- design(data = dat,model=LBA,matchfun=matchfun,
#' formula=list(v~lM,sv~lM,B~E+lR,A~1,t0~1),
#' contrasts=list(v=list(lM=ADmat)),constants=c(sv=log(1)))
#'
#' # specify priors
#' pmean <- c(v=1,v_lMdiff=1,sv_lMTRUE=log(.5), B=log(.5),B_Eneutral=log(1.5),
#'            B_Eaccuracy=log(2),B_lRright=0, A=log(0.25),t0=log(.2))
#' psd <- c(v=1,v_lMdiff=0.5,sv_lMTRUE=.5,
#'          B=0.3,B_Eneutral=0.3,B_Eaccuracy=0.3,B_lRright=0.3,A=0.4,t0=.5)
#' prior_LBABE <- prior(design_LBABE, type = 'standard',pmean=pmean,psd=psd)
#'
#' # create emc object
#' LBABE <- make_emc(dat,design_LBABE,type="standard",  prior=prior_LBABE)
#'
#' @export

make_emc <- function(data,design,model=NULL,
                          type="standard",
                          n_chains=3,compress=TRUE,rt_resolution=0.02,
                          prior_list = NULL,
                          par_groups=NULL, ...){
  # arguments for future compatibility
  n_factors <- NULL
  formula <- NULL
  Lambda_mat <- NULL
  B_mat <- NULL
  K_mat <- NULL
  G_mat <- NULL
  covariates <- NULL
  nuisance <- NULL
  nuisance_non_hyper <- NULL
  # overwrite those that were supplied
  optionals <- list(...)
  for (name in names(optionals) ) {
    assign(name, optionals[[name]])
  }
  if(!is.null(prior_list) & !is.null(prior_list$theta_mu_mean)){
    prior_list <- list(prior_list)
  }
  if(!is.null(prior_list)){
    type <- attr(prior_list[[1]], "type")
  }
  if(type != "single" && length(unique(data$subjects)) == 1){
    stop("can only use type = `single` if there's only one subject in the data")
  }

  if (!(type %in% c("standard","diagonal","blocked","factor","single", "lm", "infnt_factor", "SEM", "diagonal-gamma")))
    stop("type must be one of: standard,diagonal,blocked,factor,infnt_factor, lm, single")

  if(!is.null(nuisance) & !is.null(nuisance_non_hyper)){
    stop("You can only specify nuisance OR nuisance_non_hyper")
  }
  if (is(data, "data.frame")) data <- list(data)

  data <- lapply(data,function(d){
    if (!is.factor(d$subjects)) d$subjects <- factor(d$subjects)
    d <- d[order(d$subjects),]
    LC <- attr(d,"LC")
    UC <- attr(d,"UC")
    LT <- attr(d,"LT")
    UT <- attr(d,"UT")
    d <- add_trials(d)
    attr(d,"LC") <- LC
    attr(d,"UC") <- UC
    attr(d,"LT") <- LT
    attr(d,"UT") <- UT
    d
  })
  if (!is.null(names(design)[1]) && names(design)[1]=="Flist"){
    design <- list(design)
  }
  checks <- sapply(design, function(x) is(x, "emc.design"))
  if (!all(checks)) stop("design must be a list of emc.design objects")
  if (length(design)!=length(data)){
    design <- rep(design,length(data))
  }
  if (is.null(model)) model <- lapply(design,function(x){x$model})
  if (any(unlist(lapply(model,is.null))))
    stop("Must supply model if model is not in all design components")
  if (!is.null(names(model)[1]) && names(model)[1]=="type")
    model <- list(model)
  if (length(model)!=length(data))
    model <- rep(model,length(data))

  dadm_list <- vector(mode="list",length=length(data))
  rt_resolution <- rep(rt_resolution,length.out=length(data))
  for (i in 1:length(dadm_list)) {
    message("Processing data set ",i)
    # if (!is.null(design[[i]]$Ffunctions)) {
    #   pars <- attr(data[[i]],"pars")
    #   data[[i]] <- cbind.data.frame(data[[i]],data.frame(lapply(
    #     design[[i]]$Ffunctions,function(f){f(data[[i]])})))
    #   if (!is.null(pars)) attr(data[[i]],"pars") <- pars
    # }
    if(is.null(attr(design[[i]], "custom_ll"))){
      dadm_list[[i]] <- design_model(data=data[[i]],design=design[[i]],
                                     compress=compress,model=model[[i]],rt_resolution=rt_resolution[i])
      sampled_p_names <- names(attr(design[[i]],"p_vector"))
    } else{
      dadm_list[[i]] <- design_model_custom_ll(data = data[[i]],
                                               design = design[[i]],model=model[[i]])
      sampled_p_names <- attr(design[[i]],"sampled_p_names")
    }
    if(length(prior_list) == length(data)){
      if(!is.null(prior_list[[i]])){
        prior_list[[i]] <- check_prior(prior_list[[i]], sampled_p_names)
      }
    }
    # create a design model
  }
  # Make sure class retains following changes
  class(design) <- "emc.design"
  prior_in <- merge_priors(prior_list)
  prior_in <- prior(design, type, update = prior_in, ...)
  attr(dadm_list[[1]], "prior") <- prior_in

  # if(!is.null(subject_covariates)) attr(dadm_list, "subject_covariates") <- subject_covariates
  if (type %in% c("standard", "single", "diagonal", "infnt_factor", "diagonal-gamma")) {
    out <- pmwgs(dadm_list, type, nuisance = nuisance,
                 nuisance_non_hyper = nuisance_non_hyper,
                 n_factors = n_factors)
  } else if (type == "blocked") {
    if (is.null(par_groups)) stop("Must specify par_groups for blocked type")
    out <- pmwgs(dadm_list, type, par_groups=par_groups,
                 nuisance = nuisance,
                 nuisance_non_hyper = nuisance_non_hyper)
  } else if (type == "factor") {
    out <- pmwgs(dadm_list, type, n_factors = n_factors,
                 nuisance = nuisance,
                 nuisance_non_hyper = nuisance_non_hyper,
                 Lambda_mat = Lambda_mat)
  } else if (type == "SEM"){
    out <- pmwgs(dadm_list, type, Lambda_mat = Lambda_mat,
                 B_mat = B_mat, K_mat = K_mat, G_mat = G_mat,
                 covariates = covariates, nuisance = nuisance,
                 nuisance_non_hyper = nuisance_non_hyper)
  }
  out$model <- lapply(design, function(x) x$model)
  # Only for joint models we need to keep a list of functions
  if(length(out$model) == 1) out$model <- out$model[[1]]
  # replicate chains
  dadm_lists <- rep(list(out),n_chains)
  # For post predict
  class(dadm_lists) <- "emc"
  return(dadm_lists)
}

fix_fileName <- function(x){
  ext <- substr(x, nchar(x)-5, nchar(x))
  if(ext != ".RData" & ext != ".Rdata"){
    return(paste0(x, ".RData"))
  } else{
    return(x)
  }
}



extractDadms <- function(dadms, names = 1:length(dadms)){
  N_models <- length(dadms)
  pars <- attr(dadms[[1]], "sampled_p_names")
  prior <- attr(dadms[[1]], "prior")
  subjects <- unique(factor(sapply(dadms, FUN = function(x) levels(x$subjects))))
  dadm_list <- dm_list(dadms[[1]])
  components <- rep(1, length(pars))
  if(N_models > 1){
    total_dadm_list <- vector("list", length = N_models)
    k <- 1
    pars <- paste(names[1], pars, sep = "|")
    dadm_list[as.character(which(!subjects %in% unique(dadms[[1]]$subjects)))] <- NA
    total_dadm_list[[1]] <- dadm_list
    for(dadm in dadms[-1]){
      k <- k + 1
      tmp_list <- vector("list", length = length(subjects))
      tmp_list[as.numeric(unique(dadm$subjects))] <- dm_list(dadm)
      total_dadm_list[[k]] <- tmp_list
      curr_pars <- attr(dadm, "sampled_p_names")
      components <- c(components, rep(k, length(curr_pars)))
      pars <- c(pars, paste(names[k], curr_pars, sep = "|"))
    }
    dadm_list <- do.call(mapply, c(list, total_dadm_list, SIMPLIFY = F))
  }
  # subject_covariates_ok <- unlist(lapply(subject_covariates, FUN = function(x) length(x) == length(subjects)))
  # if(!is.null(subject_covariates_ok)) if(any(!subject_covariates_ok)) stop("subject_covariates must be as long as the number of subjects")
  attr(dadm_list, "components") <- components
  attr(dadm_list, "shared_ll_idx") <- components
  return(list(pars = pars, prior = prior,
              dadm_list = dadm_list, subjects = subjects))
}

auto_mclapply <- function(X, FUN, mc.cores, ...){
  if(Sys.info()[1] == "Windows"){
    cluster <- parallel::makeCluster(mc.cores)
    list_out <- parallel::parLapply(cl = cluster, X,FUN, ...)
    parallel::stopCluster(cluster)
  } else{
    list_out <- parallel::mclapply(X, FUN, mc.cores = mc.cores, ...)
  }
  return(list_out)
}

#' Strip all entries except samples from EMC list entries
#' @param emc A list of EMC objects
#' @return The same list with everything but samples removed from all but first entry
#' @noRd
strip_duplicates <- function(emc, incl_props = TRUE) {
  # Keep only samples in non-first entries
  for (i in 2:length(emc)) {
    samples <- emc[[i]]$samples
    prop_var <- attr(emc[[i]], "prop_var")
    emc[[i]] <- list(samples = samples)
    attr(emc[[i]], "prop_var") <- prop_var
  }
  if(incl_props){
    # Also remove eff_mu, eff_var, chains_cov
    for (i in 1:length(emc)) {
      emc[[i]]$eff_mu <- NULL
      emc[[i]]$eff_var <- NULL
      emc[[i]]$chains_cov <- NULL
      emc[[i]]$chains_mu <- NULL
    }
  }
  return(emc)
}

get_posterior_weights <- function(ll){
  max_ll <- max(ll)
  weights <- exp(ll - max_ll)
  weights <- weights / sum(weights)
  return(weights)
}

weighted_moments <- function(chain, ll = NULL) {
  # chain: a matrix with each col as a sample, and each row as a parameter
  # weights: an optional vector of weights. If not provided, equal weighting is assumed.
  n <- ncol(chain)
  d <- nrow(chain)

  # Use equal weights if none are provided.
  if (is.null(ll)) {
    weights <- rep(1 / n, n)
  } else {
    weights <- get_posterior_weights(ll)
  }

  # Compute the weighted mean of the chain.
  weighted_mean <- colSums(apply(chain, 1, function(x) x*weights))
  # NIEK THIS CAUSES ERRORS
  # # Compute the weighted covariance matrix.
  # cov_matrix <- matrix(0, nrow = d, ncol = d)
  # for (i in 1:n) {
  #   diff <- chain[,i] - weighted_mean
  #   cov_matrix <- cov_matrix + weights[i] * (diff %*% t(diff))
  # }
  cov_matrix <- cov(t(chain))
  return(list(w_cov = cov_matrix, w_mu = weighted_mean))
}



#' Restore all entries to EMC list entries from first entry
#' @param emc A list of EMC objects with stripped duplicates
#' @return The same list with all fields restored from first entry except samples
#' @noRd
restore_duplicates <- function(emc) {
  # Restore everything except samples from first entry
  for (i in 2:length(emc)) {
    samples <- emc[[i]]$samples
    emc[[i]] <- emc[[1]]
    emc[[i]]$samples <- samples
  }
  return(emc)
}