MALECOT: Joint estimation of COI and population structure for malaria genetic data

#------------------------------------------------
# The following commands ensure that package dependencies are listed in the
# NAMESPACE file.

#' @useDynLib MALECOT
#' @import parallel
#' @import coda
#' @import ggplot2
#' @import gridExtra
#' @importFrom RColorBrewer brewer.pal
#' @importFrom Rcpp sourceCpp
#' @importFrom grDevices colorRampPalette grey
#' @import stats
#' @import utils
#' @import graphics
NULL

#------------------------------------------------
#' @title Check that MALECOT package has loaded successfully
#'
#' @description Simple function to check that MALECOT package has loaded 
#'   successfully. Prints "MALECOT loaded successfully!" if so.
#'
#' @export

check_MALECOT_loaded <- function() {
  message("MALECOT loaded successfully!")
}

#------------------------------------------------
#' @title Bind bi-allelic data to project
#'
#' @description Bind data in bi-allelic format to MALECOT project. Data should
#'   be formatted as a dataframe with samples in rows and loci in columns.
#'   Genetic data should be coded as 1 (homozygote REF allele), 0 (homozygote
#'   ALT allele), or 0.5 (heterozygote). Additional meta-data columns can be
#'   specified, including a column for sample IDs and a column for sampling
#'   population.
#'
#' @param project a MALECOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param df a dataframe containing genetic information and optional meta-data
#' @param ID_col which column of the input data contains the sample IDs. If NULL
#'   then IDs must be defined seperately through the \code{ID} argument
#' @param pop_col which column of the input data contains the ostensible 
#'   population of the samples. If NULL then populations must be defined 
#'   seperately through the \code{pop} argument
#' @param data_cols which columns of the input data contain genetic information.
#'   Defaults to all remaining columns of the data once meta-data columns have 
#'   been accounted for
#' @param ID sample IDs. Ignored if using the \code{ID_col} option
#' @param pop ostensible populations. Ignored if using the \code{pop_col} option
#' @param missing_data what value represents missing data. Defaults to -9. Must 
#'   be a positive or negative integer, and cannot equal 0 or 1 as these are 
#'   reserved for genetic data.
#' @param name optional name of the data set to aid in record keeping
#' @param check_delete_output whether to prompt the user before overwriting 
#'   existing data
#'
#' @export
#' @examples
#' # TODO

bind_data_biallelic <- function(project, df, ID_col = 1, pop_col = NULL, data_cols = NULL, ID = NULL, pop = NULL, missing_data = -9, name = NULL, check_delete_output = TRUE) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  assert_dataframe(df)
  n <- nrow(df)
  if (!is.null(ID_col)) {
    assert_single_pos_int(ID_col, zero_allowed = FALSE)
    assert_leq(ID_col, ncol(df))
    ID <- as.character(df[,ID_col])
  }
  if (!is.null(pop_col)) {
    assert_single_pos_int(pop_col, zero_allowed = FALSE)
    assert_leq(pop_col, ncol(df))
    pop <- df[,pop_col]
  }
  if (is.null(data_cols)) {
    data_cols <- setdiff(1:ncol(df), c(ID_col, pop_col))
  } else {
    assert_pos_int(data_cols, zero_allowed = FALSE)
    assert_leq(data_cols, ncol(df))
    assert_noduplicates(data_cols)
  }
  ID <- define_default(ID, paste0("sample", zero_pad_simple(1:n, nchar(n))))
  assert_string(ID)
  assert_length(ID, n)
  pop <- define_default(pop, rep(1,n))
  assert_pos_int(pop)
  assert_length(pop,n)
  assert_single_int(missing_data)
  assert_not_in(missing_data, c(0, 1))
  if (!is.null(name)) {
    assert_single_string(name)
  }
  assert_single_logical(check_delete_output)
  
  # check before overwriting existing output
  if (project$active_set>0 && check_delete_output) {
    
    # ask before overwriting. On abort, return original project
    if (!user_yes_no("All existing output and parameter sets for this project will be lost. Continue? (Y/N): ")) {
      return(project)
    }
    
    # replace old project with fresh empty version
    project <- malecot_project()
  }
  
  # extract genetic data
  dat <- as.matrix(df[, data_cols, drop = FALSE])
  L <- ncol(dat)
  locus_names <- colnames(dat)
  apply(dat, 1, function(x) {
    assert_in(x, c(0, 0.5, 1, missing_data), name_x = "genetic data")
  })
  
  # process genetic data
  w <- which(dat == missing_data)
  dat[dat == 1] <- 1
  dat[dat == 0.5] <- 2
  dat[dat == 0] <- 3
  dat[w] <- 0
  
  # check for invariant loci
  invariant <- apply(dat, 2, function(x) {
                      all(x == 1) || all(x == 3)
                    })
  if (any(invariant)) {
    stop("data cannot contain invariant loci (i.e. loci for which the same single haplotype is observed in every sample)")
  }
  
  # create dat_processed list
  dat_processed <- list(data = dat,
                        n = nrow(dat),
                        L = ncol(dat),
                        ID = ID,
                        pop = pop,
                        alleles = rep(2,L),
                        locus_names = locus_names,
                        data_format = "biallelic",
                        name = name)
  
  # add data to project
  project$data <- df
  project$data_processed <- dat_processed
  
  return(project)
}

#------------------------------------------------
#' @title Bind multi-allelic format data to project
#'
#' @description Bind data in multi-allelic format to MALECOT project. Data 
#'   should be formatted as a dataframe with three columns: "sample_ID", "locus"
#'   and "haplotype". Each row of this dataframe specifies a haplotype that was 
#'   observed at that locus in that individual. Haplotypes should be coded as
#'   positive integers.
#'
#' @param project a MALECOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param df a dataframe with three columns, as decribed above
#' @param pop ostensible populations of the samples
#' @param missing_data what value represents missing data. Defaults to -9. Must
#'   be a positive or negative integer
#' @param alleles the number of alleles at each locus. If scalar then the same 
#'   number of alleles is assumed at all loci. If NULL then the number of
#'   alleles is inferred directly from data as the maximum observed value per
#'   locus
#' @param name optional name of the data set to aid in record keeping
#' @param check_delete_output whether to prompt the user before overwriting 
#'   existing data
#'
#' @export
#' @examples
#' # TODO

bind_data_multiallelic <- function(project, df, pop = NULL, missing_data = -9, alleles = NULL, name = NULL, check_delete_output = TRUE) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  assert_dataframe(df)
  n <- nrow(df)
  pop <- define_default(pop, rep(1,n))
  assert_pos_int(pop)
  assert_length(pop,n)
  assert_single_int(missing_data)
  if (!is.null(alleles)) {
    assert_pos_int(alleles, zero_allowed = FALSE)
  }
  if (!is.null(name)) {
    assert_single_string(name)
  }
  assert_single_logical(check_delete_output)
  
  # check before overwriting existing output
  if (project$active_set > 0 && check_delete_output) {
    
    # ask before overwriting. On abort, return original project
    if (!user_yes_no("All existing output and parameter sets for this project will be lost. Continue? (Y/N): ")) {
      return(project)
    }
    
    # replace old project with fresh empty version
    project <- malecot_project()
  }
  
  # check data format
  assert_ncol(df, 3)
  assert_eq(names(df), c("sample_ID", "locus", "haplotype"))
  
  # check sample_ID column
  assert_string(df$sample_ID)
  ID <- unique(df$sample)
  n <- length(ID)
  
  # check locus column
  assert_pos_int(df$locus)
  locus_names <- unique(subset(df, df$sample_ID == df$sample_ID[1])$locus)
  L <- length(locus_names)
  good_loci <- mapply(function(x) {
                        return(isTRUE(all.equal(unique(x), locus_names)))
                      }, split(df$locus, f = df$sample_ID))
  if (!all(good_loci)) {
    stop(sprintf("all samples must contain loci in the same range, i.e. 1:%s", L))
  }
  
  # expand alleles to vector if specified as scalar
  if (!is.null(alleles)) {
    if (length(alleles) == 1) {
      alleles <- rep(alleles, L)
    }
  }
  
  # check haplotype column
  if (any(df$haplotype <= 0 & df$haplotype != missing_data)) {
    stop("for the multi-allelic format haplotypes must be coded as positive integers or as missing data")
  }
  observed_n_alleles <- mapply(function(x) {
                                  length(unique(x[!is.na(x)]))
                                }, split(df$haplotype, f = df$locus))
  if (any(observed_n_alleles == 1)) {
    stop("data cannot contain invariant loci (i.e. loci for which the same single haplotype is observed in every sample)")
  }
  if (all(observed_n_alleles == 2)) {
    message("Note: data contains two alleles at every locus. Consider reformatting in bi-allelic format to speed up MCMC")
  }
  observed_max_alleles <- mapply(function(x) {
                                    max(x[!is.na(x)])
                                  }, split(df$haplotype, f = df$locus))
  if (is.null(alleles)) {
    alleles <- observed_max_alleles
  }
  if (any(observed_max_alleles > alleles)) {
    stop("observed number of alleles exceeds 'alleles' argument at some loci")
  }
  
  # process genetic data. Re-factor loci as increasing integers, and haplotypes
  # as increasing integers with 0 indicating missing data
  df_processed <- df
  df_processed$locus <- match(df_processed$locus, locus_names)
  df_processed$haplotype[df_processed$haplotype == missing_data] <- NA
  haplotype_uniques <- mapply(function(x) {
                                sort(unique(x[!is.na(x)]))
                              }, split(df_processed$haplotype, f = df_processed$locus), SIMPLIFY = FALSE)
  new_haplotype <- mapply(function(x,y) {
                            match(y, haplotype_uniques[[x]])
                          }, df_processed$locus, df_processed$haplotype)
  df_processed$haplotype <- new_haplotype
  df_processed$haplotype[is.na(df_processed$haplotype)] <- 0
  
  # reformat into list over samples and loci
  data_processed <- mapply(function(x) {
    tmp <- mapply(function(y) y$haplotype, split(x, f = x$locus), SIMPLIFY = FALSE)
    names(tmp) <- NULL
    tmp
  }, split(df_processed, f = df_processed$sample_ID), SIMPLIFY = FALSE)
  names(data_processed) <- NULL
  
  # add data to project
  project$data <- df
  project$data_processed <- list(data = data_processed,
                                 n = n,
                                 L = L,
                                 ID = ID,
                                 pop = pop,
                                 alleles = alleles,
                                 locus_names = locus_names,
                                 data_format = "multiallelic",
                                 name = name)
  
  return(project)
}

#------------------------------------------------
#' @title Create new MALECOT parameter set
#'
#' @description TODO
#'
#' @details TODO
#'
#' @param project a MALECOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param name the name of the parameter set
#' @param lambda the shape parameter(s) of the prior on allele frequencies. This
#'   prior is Beta in the bi-allelic case, and Dirichlet in the multi-allelic 
#'   case. \code{lambda} can be:
#'   \itemize{
#'     \item{a single scalar value, in which case the same value is used for 
#'     every allele and every locus (i.e. the prior is symmetric)}
#'     \item{a vector of values, in which case the same vector is used for every
#'     locus. Only works if the same number of alleles applies at every locus}
#'     \item{a list of vectors specifying the shape parameter separately for
#'     each allele of each locus. The list must of length \code{L}, and must
#'     contain vectors of length equal to the number of alleles at that locus}
#'   }
#' @param COI_model the type of prior on COI. Must be one of "uniform",
#'   "poisson", or "nb" (negative binomial)
#' @param COI_max the maximum COI allowed for any given sample
#' @param COI_manual A vector of length n (where n is the number of samples)
#'   allowing the COI to be specified manually. Positive values indicate fixed
#'   COIs that should not be updated as part of the MCMC, while -1 values
#'   indicate that COIs should be estimated. Defaults to \code{rep(-1,n)},
#'   meaning all COIs will be esimated
#' @param estimate_COI_mean whether the mean COI should be estimated for each 
#'   subpopulation as part of the MCMC, otherwise the value \code{COI_mean} is 
#'   used for all subpopulations. Defaults to \code{TRUE}. Note that mean COI 
#'   estimation is only possible under the Poisson and negative binomial models
#'   (see \code{COI_model})
#' @param COI_mean single scalar value specifying the mean COI for all
#'   subpopulations (see \code{estimate_COI_mean} above)
#' @param COI_dispersion  the ratio of the variance to the mean of the prior on
#'   COI. Only applies under the negative binomial model. Must be >1, as a ratio
#'   of 1 can be achieved by using the Poisson distribution
#' @param estimate_error whether to estimate error probabilities \code{e1} and
#'   \code{e2}
#' @param e1 the probability of a true homozygote being incorrectly called as a
#'   heterozygote
#' @param e2 the probability of a true heterozygote being incorrectly called as a
#'   homozygote
#' @param e1_max the maximum possible value of \code{e1}
#' @param e2_max the maximum possible value of \code{e2}
#'
#' @export
#' @examples
#' # TODO

new_set <- function(project, name = "(no name)", lambda = 1.0, COI_model = "poisson", COI_max = 20, COI_manual = NULL, estimate_COI_mean = TRUE, COI_mean = 3.0, COI_dispersion = 2.0, estimate_error = FALSE, e1 = 0.0, e2 = 0.0, e1_max = 0.2, e2_max = 0.2) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  n <- project$data_processed$n
  L <- project$data_processed$L
  data_format <- project$data_processed$data_format
  assert_single_string(name)
  
  assert_pos(unlist(lambda), zero_allowed = FALSE)
  if (is.list(lambda)) {
    assert_length(lambda, L)
    lambda_nalleles <- mapply(length, lambda)
    if (!all(lambda_nalleles == project$data_processed$alleles)) {
      stop("when lambda is a list it must contain one entry per allele at every locus")
    }
  } else {
    if (length(lambda) > 1) {
      if (!all(project$data_processed$alleles == length(lambda))) {
        stop("when lambda is a vector its length must equal the number of alleles at every locus")
      }
    }
  }
  
  if (is.list(lambda)) {
    assert_length(lambda,L)
    assert_pos(unlist(lambda), zero_allowed = FALSE)
  } else {
    assert_pos(lambda, zero_allowed = FALSE)
  }
  
  assert_single_string(COI_model)
  assert_in(COI_model, c("uniform", "poisson", "nb"))
  assert_single_pos_int(COI_max, zero_allowed = FALSE)
  COI_manual <- define_default(COI_manual, rep(-1,n))
  assert_vector(COI_manual)
  assert_length(COI_manual, n)
  assert_int(COI_manual)
  assert_bounded(COI_manual[COI_manual != -1], left = 1, right = COI_max, inclusive_left = TRUE, inclusive_right = TRUE)
  assert_single_logical(estimate_COI_mean)
  if (estimate_COI_mean) {
    if (COI_model != "uniform") {
      assert_single_pos(COI_mean)
      assert_gr(COI_mean, 1)
      if (COI_model == "nb") {
        assert_single_pos(COI_dispersion)
        assert_gr(COI_dispersion, 1)
      }
    }
  }
  assert_single_logical(estimate_error)
  assert_single_numeric(e1)
  assert_bounded(e1, left = 0.0, right = 1.0, inclusive_left = TRUE, inclusive_right = TRUE)
  assert_single_numeric(e2)
  assert_bounded(e2, left = 0.0, right = 1.0, inclusive_left = TRUE, inclusive_right = TRUE)
  assert_bounded(e1_max)
  assert_bounded(e2_max)
  if (estimate_error) {
    assert_bounded(e1, right = e1_max)
    assert_bounded(e2, right = e2_max)
  }
  
  # check that lambda specified correctly
  if (is.list(lambda)) {
    lambda_length <- mapply(length, lambda)
    assert_eq(project$data_processed$alleles, lambda_length, message = sprintf("when in list form, lambda must have one value for every allele at every locus."))
  }
  
  # count current parameter sets and add one
  s <- length(project$parameter_sets) + 1
  
  # make new set active
  project$active_set <- s
  
  # create new parameter set
  project$parameter_sets[[s]] <- list(name = name,
                                      lambda = lambda,
                                      COI_model = COI_model,
                                      COI_max = COI_max,
                                      COI_manual = COI_manual,
                                      estimate_COI_mean = estimate_COI_mean,
                                      COI_mean = COI_mean,
                                      COI_dispersion = COI_dispersion,
                                      estimate_error = estimate_error,
                                      e1 = e1,
                                      e2 = e2,
                                      e1_max = e1_max,
                                      e2_max = e2_max)
  
  # name parameter set
  names(project$parameter_sets)[s] <- paste0("set", s)
  
  # create new output at all_K level
  GTI_logevidence <- data.frame(K = numeric(),
                                mean = numeric(),
                                SE = numeric())
  class(GTI_logevidence) <- "malecot_GTI_logevidence"
  
  GTI_posterior <- data.frame(K = numeric(),
                              Q2.5 = numeric(),
                              Q50 = numeric(),
                              Q97.5 = numeric())
  class(GTI_posterior) <- "malecot_GTI_posterior"
  
  project$output$single_set[[s]] <- list(single_K = list(),
                                         all_K = list(GTI_logevidence = GTI_logevidence,
                                                      GTI_posterior = GTI_posterior))
  
  # name new output
  names(project$output$single_set) <- paste0("set", 1:length(project$output$single_set))
  
  # expand summary output over all parameter sets
  GTI_logevidence_model <- rbind(project$output$all_sets$GTI_logevidence_model, data.frame(set = s, name = name, mean = NA, SE = NA, stringsAsFactors = FALSE))
  class(GTI_logevidence_model) <- "malecot_GTI_logevidence_model"
  project$output$all_sets$GTI_logevidence_model <- GTI_logevidence_model
  
  GTI_posterior_model <- rbind(project$output$all_sets$GTI_posterior_model, data.frame(set = s, name = name, Q2.5 = NA, Q50 = NA, Q97.5 = NA, stringsAsFactors = FALSE))
  class(GTI_posterior_model) <- "malecot_GTI_posterior_model"
  project$output$all_sets$GTI_posterior_model <- GTI_posterior_model
  
  # return invisibly
  invisible(project)
}

#------------------------------------------------
#' @title Change the active set of a MALECOT project
#'
#' @description Change the active set of a MALECOT project
#'
#' @param project a MALECOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param set the new active set
#'
#' @export
#' @examples
#' # TODO

active_set <- function(project, set) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  assert_single_pos_int(set)
  assert_leq(set, length(project$parameter_sets), message = "chosen set outside range")
  
  # change active set
  project$active_set <- set
  message(sprintf("active set = %s", set))
  
  # return invisibly
  invisible(project)
}

#------------------------------------------------
#' @title Delete parameter set
#'   
#' @description Delete a given parameter set from a MALECOT project.
#'
#' @param project a MALECOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param set which set to delete. Defaults to the current active set
#' @param check_delete_output whether to prompt the user before deleting any 
#'   existing output
#'
#' @export
#' @examples
#' # TODO

delete_set <- function(project, set = NULL, check_delete_output = TRUE) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  assert_single_logical(check_delete_output)
  
  # set index to active_set by default
  set <- define_default(set, project$active_set)
  
  # further checks
  assert_single_pos_int(set, zero_allowed = FALSE)
  assert_leq(set, length(project$parameter_sets))
  
  # check before overwriting existing output
  if (project$active_set>0 & check_delete_output) {
    
    # ask before overwriting. On abort, return original project
    if (!user_yes_no(sprintf("Any existing output for set %s will be deleted. Continue? (Y/N): ", set))) {
      return(project)
    }
  }
  
  # drop chosen parameter set
  project$parameter_sets[[set]] <- NULL
  
  # drop chosen output
  project$output$single_set[[set]] <- NULL
  
  GTI_logevidence_model <- as.data.frame(unclass(project$output$all_sets$GTI_logevidence_model))[-set,]
  class(GTI_logevidence_model) <- "malecot_GTI_logevidence_model"
  project$output$all_sets$GTI_logevidence_model <- GTI_logevidence_model
  
  GTI_posterior_model <- as.data.frame(unclass(project$output$all_sets$GTI_posterior_model))[-set,]
  class(GTI_posterior_model) <- "malecot_GTI_posterior_model"
  project$output$all_sets$GTI_posterior_model <- GTI_posterior_model
  
  # make new final set active
  project$active_set <- length(project$parameter_sets)
  
  # recalculate evidence over sets if needed
  if (project$active_set > 0) {
    project <- recalculate_evidence(project)
  }
  
  # return invisibly
  invisible(project)
}

#------------------------------------------------
#' @title Run main MCMC
#'
#' @description Run the main MALECOT MCMC. Model parameters are taken from the 
#'   current active parameter set, and MCMC parameters are passed in as 
#'   arguments. All output is stored within the project.
#'   
#' @param project a MALECOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param K the values of K that the MCMC will explore
#' @param precision the level of precision at which allele frequencies are 
#'   represented in the bi-allelic case. This allows the use of look-up tables
#'   for the likelihood, which significantly speeds up the MCMC. Set to 0 to use
#'   exact values (up to C++ "double" precision) rather than using look-up 
#'   tables
#' @param burnin the number of burn-in iterations
#' @param samples the number of sampling iterations
#' @param rungs the number of temperature rungs
#' @param GTI_pow the power used in the generalised thermodynamic integration 
#'   method. Must be greater than 1.1
#' @param auto_converge whether convergence should be assessed automatically 
#'   every \code{converge_test} iterations, leading to termination of the 
#'   burn-in phase. If \code{FALSE} then the full \code{burnin} iterations are 
#'   used
#' @param converge_test test for convergence every \code{convergence_test} 
#'   iterations if \code{auto_converge} is being used
#' @param solve_label_switching_on whether to implement the Stevens' solution to
#'   the label-switching problem. If turned off then Q-matrix output will no 
#'   longer be correct, although evidence estimates will be unaffected.
#' @param coupling_on whether to implement Metropolis-coupling over temperature 
#'   rungs
#' @param cluster option to pass in a cluster environment (see package 
#'   "parallel")
#' @param pb_markdown whether to run progress bars in markdown mode, in which 
#'   case they are updated once at the end to avoid large amounts of output
#' @param store_acceptance whether to store acceptance rates for all parameters
#'   updated by Metropolis-Hastings. Proposal distributions are tuned adaptively
#'   with a target acceptance rate of 23\%
#' @param store_raw whether to store raw MCMC output in addition to summary 
#'   output. Setting to FALSE can considerably reduce output size in memory
#' @param silent whether to suppress all console output
#'
#' @export
#' @examples
#' # TODO

run_mcmc <- function(project, K = NULL, precision = 0.01, burnin = 1e3, samples = 1e3, rungs = 1, GTI_pow = 3, auto_converge = TRUE, converge_test = 1e2, solve_label_switching_on = TRUE, coupling_on = TRUE, cluster = NULL, pb_markdown = FALSE, store_acceptance = TRUE, store_raw = TRUE, silent = FALSE) {
  
  # start timer
  t0 <- Sys.time()
  
  # ---------- check inputs ----------
  
  assert_custom_class(project, "malecot_project")
  assert_pos_int(K, zero_allowed = FALSE)
  assert_single_pos(precision, zero_allowed = TRUE)
  assert_single_pos_int(burnin, zero_allowed = FALSE)
  assert_single_pos_int(samples, zero_allowed = FALSE)
  assert_single_pos_int(rungs, zero_allowed = FALSE)
  assert_single_pos(GTI_pow)
  assert_gr(GTI_pow, 1.1)
  assert_single_logical(auto_converge)
  assert_single_pos_int(converge_test, zero_allowed = FALSE)
  assert_single_logical(solve_label_switching_on)
  assert_single_logical(coupling_on)
  if (!is.null(cluster)) {
    assert_custom_class(project, "cluster")
  }
  assert_single_logical(pb_markdown)
  assert_single_logical(store_acceptance)
  assert_single_logical(silent)
  
  # get active set
  s <- project$active_set
  if (s==0) {
    stop("no active parameter set")
  }
  
  # check that precision is either zero or leads to simple sequence
  if (precision != 0) {
    if(!((1/precision)%%1) == 0) {
      stop("1/precision must be an integer")
    }
  }
  
  # get useful quantities
  n <- project$data_processed$n
  L <- project$data_processed$L
  data_format <- project$data_processed$data_format
  dat <- project$data_processed$data
  if (data_format == "biallelic") {
    dat = mat_to_rcpp(dat)
  }
  
  # ---------- create argument lists ----------
  
  # data list
  args_data <- list(data = dat,
                    n = n,
                    L = L,
                    alleles = project$data_processed$alleles)
  
  # input arguments list
  args_inputs <- list(precision = precision,
                      burnin = burnin,
                      samples = samples,
                      rungs = rungs,
                      GTI_pow = GTI_pow,
                      auto_converge = auto_converge,
                      converge_test = converge_test,
                      solve_label_switching_on = solve_label_switching_on,
                      coupling_on = coupling_on,
                      pb_markdown = pb_markdown,
                      store_acceptance = store_acceptance,
                      silent = !is.null(cluster))
  
  # combine model parameters list with input arguments
  args_model <- c(project$parameter_sets[[s]], args_inputs)
  
  # get COI_model in numeric form
  args_model$COI_model_numeric <- match(args_model$COI_model, c("uniform", "poisson" ,"nb"))
  
  # record type of lambda (scalar, vector, list)
  lambda_type <- 1
  if (!is.list(args_model$lambda)) {
    if (length(args_model$lambda) != 1) {
      lambda_type <- 2
    }
  } else {
    lambda_type <- 3
  }
  args_model$lambda_type <- lambda_type
  
  # R functions to pass to Rcpp
  args_functions <- list(test_convergence = test_convergence,
                         update_progress = update_progress)
  
  # define final argument list over all K
  parallel_args <- list()
  for (i in 1:length(K)) {
    
    # create progress bars
    pb_burnin <- txtProgressBar(min = 0, max = burnin, initial = NA, style = 3)
    pb_samples <- txtProgressBar(min = 0, max = samples, initial = NA, style = 3)
    args_progress <- list(pb_burnin = pb_burnin,
                          pb_samples = pb_samples)
    
    # incporporate arguments unique to this K
    args_model$K <- K[i]
    
    # create argument list
    parallel_args[[i]] <- list(args_data = args_data,
                               args_model = args_model,
                               args_functions = args_functions,
                               args_progress = args_progress)
  }
  
  # ---------- run MCMC ----------
  
  # split into parallel and serial implementations
  if (!is.null(cluster)) { # run in parallel
    clusterEvalQ(cluster, library(malecot))
    if (data_format == "biallelic") {
      output_raw <- clusterApplyLB(cl = cluster, parallel_args, run_mcmc_biallelic_cpp)
    }
    if (data_format == "multiallelic") {
      output_raw <- clusterApplyLB(cl = cluster, parallel_args, run_mcmc_multiallelic_cpp)
    }
  } else { # run in serial
    if (data_format == "biallelic") {
      output_raw <- lapply(parallel_args, run_mcmc_biallelic_cpp)
    }
    if (data_format == "multiallelic") {
      output_raw <- lapply(parallel_args, run_mcmc_multiallelic_cpp)
    }
  }
  
  # ---------- process results ----------
  
  # begin processing results
  if (!silent) {
    cat("Processing results\n")
  }
  
  # loop through K
  ret <- list()
  all_converged <- TRUE
  for (i in 1:length(K)) {
    
    # create name lists
    sample_names <- paste0("sample", 1:n)
    locus_names <- paste0("locus", 1:L)
    deme_names <- paste0("deme", 1:K[i])
    rung_names <- paste0("rung", 1:rungs)
    
    # ---------- raw mcmc results ----------
    
    # get loglikelihood in coda::mcmc format
    loglike_burnin <- mapply(function(x){mcmc(x)}, output_raw[[i]]$loglike_burnin)
    loglike_sampling <- mcmc(t(rcpp_to_mat(output_raw[[i]]$loglike_sampling)))
    
    # get full COI trace in coda::mcmc format
    full_COI <- mcmc(rcpp_to_mat(output_raw[[i]]$m_store))
    colnames(full_COI) <- sample_names
    
    # get full p trace in coda::mcmc format
    full_p <- list()
    for (k in 1:K[i]) {
      full_p[[k]] <- list()
      for (j in 1:L) {
        p_j <- rcpp_to_mat(output_raw[[i]]$p_store[[k]][[j]])
        colnames(p_j) <- paste0("allele", 1:ncol(p_j))
        full_p[[k]][[j]] <- mcmc(p_j)
      }
      names(full_p[[k]]) <- locus_names
    }
    names(full_p) <- deme_names
    
    # get full e1 and e2 trace in coda::mcmc format
    full_e1 <- full_e2 <- NULL
    if (data_format == "biallelic" && args_model$estimate_error) {
      full_e1 <- mcmc(output_raw[[i]]$e1_store)
      full_e2 <- mcmc(output_raw[[i]]$e2_store)
    }
    
    # get full COI_mean trace in coda::mcmc format
    full_COI_mean <- NULL
    if (args_model$COI_model %in% c("poisson", "nb")) {
      full_COI_mean <- rcpp_to_mat(output_raw[[i]]$COI_mean_store)
      colnames(full_COI_mean) <- deme_names
      full_COI_mean <- mcmc(full_COI_mean)
    }
    
    # get whether rungs have converged
    converged <- output_raw[[i]]$rung_converged
    if (all_converged && any(!converged)) {
      all_converged <- FALSE
    }
    
    # ---------- summary results ----------
    
    # get 95% credible intervals over sampling loglikelihoods
    loglike_intervals <- t(apply(loglike_sampling, 2, quantile_95))
    rownames(loglike_intervals) <- rung_names
    class(loglike_intervals) <- "malecot_loglike_intervals"
    
    # get 95% credible intervals over COI
    COI_intervals <- t(apply(full_COI, 2, quantile_95))
    class(COI_intervals) <- "malecot_COI_intervals"
    
    # get COI matrix
    COI_matrix <- apply(full_COI, 2, function(x) tabulate(x, nbins = args_model$COI_max))/samples
    
    # get 95% credible intervals over p
    p_intervals <- list()
    for (k in 1:K[i]) {
      p_intervals[[k]] <- list()
      for (j in 1:L) {
        p_intervals[[k]][[j]] <- t(apply(full_p[[k]][[j]], 2, quantile_95))
      }
      names(p_intervals[[k]]) <- locus_names
      class(p_intervals[[k]]) <- "malecot_p_intervals"
    }
    names(p_intervals) <- deme_names
    
    # get 95% credible intervals over COI_mean
    if (is.null(full_COI_mean)) {
      fake_COI_mean <- matrix((args_model$COI_max + 1)/2, ncol = K[i])
      colnames(fake_COI_mean) <- deme_names
      COI_mean_intervals <- t(apply(fake_COI_mean, 2, quantile_95))
    } else {
      COI_mean_intervals <- t(apply(full_COI_mean, 2, quantile_95))
    }
    class(COI_mean_intervals) <- "malecot_COI_mean_intervals"
    
    # get 95% credible intervals over e1 and e2
    e_intervals <- NULL
    if (data_format == "biallelic") {
      if (args_model$estimate_error) {
        e_intervals <- rbind(quantile_95(full_e1), quantile_95(full_e2))
      } else {
        e_intervals <- rbind(quantile_95(args_model$e1), quantile_95(args_model$e2))
      }
      rownames(e_intervals) <- c("e1", "e2")
      class(e_intervals) <- "malecot_e_intervals"
    }
    
    # process Q-matrix
    qmatrix <- rcpp_to_mat(output_raw[[i]]$qmatrix)
    colnames(qmatrix) <- deme_names
    rownames(qmatrix) <- sample_names
    class(qmatrix) <- "malecot_qmatrix"
    
    # ---------- GTI path and model evidence ----------
    
    # get ESS
    ESS <- effectiveSize(loglike_sampling)
    ESS[ESS == 0] <- samples # if no variation then assume zero autocorrelation
    ESS[ESS > samples] <- samples # ESS cannot exceed actual number of samples taken
    names(ESS) <- rung_names
    
    # weight likelihood according to GTI_pow
    loglike_weighted <- loglike_sampling
    for (j in 1:rungs) {
      beta_j <- j/rungs
      loglike_weighted[,j] <- GTI_pow*beta_j^(GTI_pow-1) * loglike_sampling[,j]
    }
    
    # calculate GTI path mean and SE
    GTI_path_mean <- colMeans(loglike_weighted)
    GTI_path_var <- apply(loglike_weighted, 2, var)
    GTI_path_SE <- sqrt(GTI_path_var/ESS)
    GTI_path <- data.frame(mean = GTI_path_mean, SE = GTI_path_SE)
    rownames(GTI_path) <- rung_names
    class(GTI_path) <- "malecot_GTI_path"
    
    # calculate GTI estimate of log-evidence
    GTI_vec <- 0.5*loglike_weighted[,1]/rungs
    if (rungs>1) {
      for (j in 2:rungs) {
        GTI_vec <- GTI_vec + 0.5*(loglike_weighted[,j]+loglike_weighted[,j-1])/rungs
      }
    }
    GTI_logevidence_mean <- mean(GTI_vec)
    
    # calculate standard error of GTI estimate
    GTI_ESS <- as.numeric(effectiveSize(GTI_vec))
    if (GTI_ESS==0) {
      GTI_ESS <- samples # if no variation then assume perfect mixing
    }
    GTI_logevidence_SE <- sqrt(var(GTI_vec)/GTI_ESS)
    
    # produce final GTI_logevidence object
    GTI_logevidence <- data.frame(estimate = GTI_logevidence_mean,
                                  SE = GTI_logevidence_SE)
    
    # ---------- acceptance rates ----------
    
    # process acceptance rates
    p_accept <- NULL
    m_accept <- NULL
    e_accept <- NULL
    coupling_accept <- NULL
    if (store_acceptance) {
      p_accept <- rcpp_to_mat(output_raw[[i]]$p_accept)/samples
      m_accept <- output_raw[[i]]$m_accept/samples
      e_accept <- output_raw[[i]]$e_accept/samples
      coupling_accept <- output_raw[[i]]$coupling_accept/samples
    }
    
    # ---------- save arguments ----------
    
    output_args <- list(precision = precision,
                        burnin = burnin,
                        samples = samples,
                        rungs = rungs,
                        GTI_pow = GTI_pow,
                        auto_converge = auto_converge,
                        converge_test = converge_test,
                        solve_label_switching_on = solve_label_switching_on,
                        coupling_on = coupling_on,
                        pb_markdown = pb_markdown,
                        store_acceptance = store_acceptance,
                        store_raw = store_raw,
                        silent = silent)
    
    # ---------- save results ----------
    
    # add to project
    project$output$single_set[[s]]$single_K[[K[i]]] <- list()
    
    project$output$single_set[[s]]$single_K[[K[i]]]$summary <- list(qmatrix = qmatrix,
                                                                    loglike_intervals = loglike_intervals,
                                                                    COI_matrix = COI_matrix,
                                                                    COI_intervals = COI_intervals,
                                                                    p_intervals = p_intervals,
                                                                    e_intervals = e_intervals,
                                                                    COI_mean_intervals = COI_mean_intervals,
                                                                    ESS = ESS,
                                                                    GTI_path = GTI_path,
                                                                    GTI_logevidence = GTI_logevidence,
                                                                    converged = converged)
    if (store_acceptance) {
      project$output$single_set[[s]]$single_K[[K[i]]]$summary$p_accept <- p_accept
      project$output$single_set[[s]]$single_K[[K[i]]]$summary$m_accept <- m_accept
      project$output$single_set[[s]]$single_K[[K[i]]]$summary$e_accept <- e_accept
      project$output$single_set[[s]]$single_K[[K[i]]]$summary$coupling_accept <- coupling_accept
    }
    
    if (store_raw) {
      project$output$single_set[[s]]$single_K[[K[i]]]$raw <- list(loglike_burnin = loglike_burnin,
                                                                  loglike_sampling = loglike_sampling,
                                                                  COI = full_COI,
                                                                  p = full_p,
                                                                  e1 = full_e1,
                                                                  e2 = full_e2,
                                                                  COI_mean = full_COI_mean)
    }
    
    project$output$single_set[[s]]$single_K[[K[i]]]$function_call <- list(args = output_args,
                                                                          call = match.call())
    
  } # end loop over K
  
  # name output over K
  K_all <- length(project$output$single_set[[s]]$single_K)
  names(project$output$single_set[[s]]$single_K) <- paste0("K", 1:K_all)
  
  # ---------- tidy up and end ----------
  
  # reorder qmatrices
  project <- align_qmatrix(project)
  
  # recalculate evidence over K
  project <- recalculate_evidence(project)
  
  # end timer
  if (!silent) {
    tdiff <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
    if (tdiff < 60) {
      message(sprintf("Total run-time: %s seconds", round(tdiff, 2)))
    } else {
      message(sprintf("Total run-time: %s minutes", round(tdiff/60, 2)))
    }
  }
  
  # warning if any rungs in any MCMCs did not converge
  if (!all_converged && !silent) {
    message("\n**WARNING** at least one MCMC run did not converge within specified burn-in\n")
  }
  
  # return invisibly
  invisible(project)
}

#------------------------------------------------
# extract GTI_logevidence from all K within a given parameter set
#' @noRd
get_GTI_logevidence_K <- function(proj, s) {
  
  # extract objects of interest
  x <- proj$output$single_set[[s]]$single_K
  if (length(x)==0) {
    return(proj)
  }
  
  # get log-evidence over all K
  GTI_logevidence_raw <- mapply(function(y) {
    GTI_logevidence <- y$summary$GTI_logevidence
    if (is.null(GTI_logevidence)) {
      return(rep(NA,2))
    } else {
      return(unlist(GTI_logevidence))
    }
  }, x)
  GTI_logevidence <- as.data.frame(t(GTI_logevidence_raw))
  names(GTI_logevidence) <- c("mean", "SE")
  rownames(GTI_logevidence) <- NULL
  GTI_logevidence <- cbind(K = 1:nrow(GTI_logevidence), GTI_logevidence)
  class(GTI_logevidence) <- "malecot_GTI_logevidence"
  
  # save result in project
  proj$output$single_set[[s]]$all_K$GTI_logevidence <- GTI_logevidence
  
  # return modified project
  return(proj)
}

#------------------------------------------------
# compute posterior over several log-evidence estimates
#' @noRd
get_GTI_posterior <- function(x) {
  
  # return NULL if all NA
  if (length(x$mean)==0 || all(is.na(x$mean))) {
    return(NULL)
  }
  
  # produce posterior estimates by simulation
  w <- which(!is.na(x$mean))
  GTI_posterior_raw <- GTI_posterior_K_sim_cpp(list(mean = x$mean[w],
                                                    SE = x$SE[w],
                                                    reps = 1e6))$ret
  # get posterior quantiles in dataframe
  GTI_posterior_quantiles <- t(mapply(quantile_95, GTI_posterior_raw))
  GTI_posterior <- data.frame(Q2.5 = rep(NA, nrow(GTI_posterior_quantiles)), Q50 = NA, Q97.5 = NA)
  GTI_posterior[w,] <- GTI_posterior_quantiles
  
  return(GTI_posterior)
}

#------------------------------------------------
# call get_GTI_posterior over values of K
#' @noRd
get_GTI_posterior_K <- function(proj, s) {
  
  # calculate posterior K
  GTI_posterior <- get_GTI_posterior(proj$output$single_set[[s]]$all_K$GTI_logevidence)
  if (is.null(GTI_posterior)) {
    return(proj)
  }
  GTI_posterior <- cbind(K = 1:nrow(GTI_posterior), GTI_posterior)
  class(GTI_posterior) <- "malecot_GTI_posterior"
  proj$output$single_set[[s]]$all_K$GTI_posterior <- GTI_posterior
  
  # return modified project
  return(proj)
}

#------------------------------------------------
# call get_GTI_posterior over models
#' @noRd
get_GTI_posterior_model <- function(proj) {
  
  # calculate posterior model
  GTI_posterior_model_raw <- get_GTI_posterior(proj$output$all_sets$GTI_logevidence_model)
  if (is.null(GTI_posterior_model_raw)) {
    return(proj)
  }
  proj$output$all_sets$GTI_posterior_model$Q2.5 <- GTI_posterior_model_raw$Q2.5
  proj$output$all_sets$GTI_posterior_model$Q50 <- GTI_posterior_model_raw$Q50
  proj$output$all_sets$GTI_posterior_model$Q97.5 <- GTI_posterior_model_raw$Q97.5
  
  # return modified project
  return(proj)
}

#------------------------------------------------
# integrate multiple log-evidence estimates by simulation
#' @noRd
integrate_GTI_logevidence <- function(x) {
  
  # return NULL if all NA
  if (length(x$mean)==0 || all(is.na(x$mean))) {
    return(NULL)
  }
  
  # produce integrated estimates by simulation
  w <- which(!is.na(x$mean))
  if (length(w)==1) {
    ret <- list(mean = x$mean[w], SE = x$SE[w])
  } else {
    ret <- GTI_integrated_K_sim_cpp(list(mean = x$mean[w], SE = x$SE[w], reps = 1e6))
  }
  
  return(ret)
}

#------------------------------------------------
# log-evidence estimates over K
#' @noRd
integrate_GTI_logevidence_K <- function(proj, s) {
  
  # integrate over K
  integrated_raw <- integrate_GTI_logevidence(proj$output$single_set[[s]]$all_K$GTI_logevidence)
  if (is.null(integrated_raw)) {
    return(proj)
  }
  proj$output$all_sets$GTI_logevidence_model$mean[s] <- integrated_raw$mean
  proj$output$all_sets$GTI_logevidence_model$SE[s] <- integrated_raw$SE
  
  # return modified project
  return(proj)
}

#------------------------------------------------
#' @title Recalculate evidence and posterior estimates
#'
#' @description When a new value of K is added in to the analysis it affects all downstream evidence estimates that depend on this K - for example the overall model evidence integrated over K. This function therefore looks through all values of K in the active set and recalculates all downstream elements as needed.
#'
#' @param project a MALCOT project, as produced by the function 
#'   \code{malecot_project()}
#' 
#' @export

recalculate_evidence <- function(project) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  
  # get active set
  s <- project$active_set
  if (s==0) {
    stop("no active parameter set")
  }
  
  # get log-evidence over all K and load into project
  project <- get_GTI_logevidence_K(project, s)
  
  # produce posterior estimates of K by simulation and load into project
  project <- get_GTI_posterior_K(project, s)
  
  # get log-evidence over all parameter sets
  project <- integrate_GTI_logevidence_K(project, s)
  
  # get posterior over all parameter sets
  project <- get_GTI_posterior_model(project)
  
  # return modified project
  return(project)
}


#------------------------------------------------
# align qmatrices over all K
#' @noRd
align_qmatrix <- function(proj) {
  
  # get active set
  s <- proj$active_set
  
  # extract objects of interest
  x <- proj$output$single_set[[s]]$single_K
  
  # find values with output
  null_output <- mapply(function(y) {is.null(y$summary$qmatrix)}, x)
  w <- which(!null_output)
  
  # set template to first qmatrix
  template_qmatrix <- x[[w[1]]]$summary$qmatrix
  n <- nrow(template_qmatrix)
  c <- ncol(template_qmatrix)
  
  # loop through output
  best_perm <- NULL
  for (i in w) {
    
    # expand template
    qmatrix <- unclass(x[[i]]$summary$qmatrix)
    template_qmatrix <- cbind(template_qmatrix, matrix(0, n, i-c))
    
    # calculate cost matrix
    cost_mat <- matrix(0,i,i)
    for (k1 in 1:i) {
      for (k2 in 1:i) {
        cost_mat[k1,k2] <- sum(qmatrix[,k1] * (log(qmatrix[,k1]+1e-100) - log(template_qmatrix[,k2]+1e-100)))
      }
    }
    
    # get lowest cost permutation
    best_perm <- call_hungarian(cost_mat)$best_matching + 1
    best_perm_order <- order(best_perm)
    
    # reorder elements
    deme_names <- paste0("deme", 1:ncol(qmatrix))
    qmatrix <- qmatrix[, best_perm_order, drop = FALSE]
    colnames(qmatrix) <- deme_names
    
    if (!is.null(x[[i]]$raw)) {
      
      p_raw <- x[[i]]$raw$p[best_perm_order]
      names(p_raw) <- deme_names
      proj$output$single_set[[s]]$single_K[[i]]$raw$p <- p_raw
      
      COI_mean_raw <- x[[i]]$raw$COI_mean
      if (!is.null(COI_mean_raw)) {
        COI_mean_raw <- COI_mean_raw[, best_perm_order, drop = FALSE]
        colnames(COI_mean_raw) <- deme_names
        proj$output$single_set[[s]]$single_K[[i]]$raw$COI_mean <- COI_mean_raw
      }
    }
    
    p_intervals <- x[[i]]$summary$p_intervals[best_perm_order]
    names(p_intervals) <- deme_names
    proj$output$single_set[[s]]$single_K[[i]]$summary$p_intervals <- p_intervals
    
    COI_mean_quantiles <- x[[i]]$summary$COI_mean_quantiles
    if (!is.null(COI_mean_quantiles)) {
      COI_mean_quantiles <- COI_mean_quantiles[best_perm_order, , drop = FALSE]
      rownames(COI_mean_quantiles) <- deme_names
      class(COI_mean_quantiles) <- "malecot_COI_mean_quantiles"
      proj$output$single_set[[s]]$single_K[[i]]$summary$COI_mean_quantiles <- COI_mean_quantiles
    }
    
    # qmatrix becomes template for next level up
    template_qmatrix <- qmatrix
    
    # store result
    class(qmatrix) <- "malecot_qmatrix"
    proj$output$single_set[[s]]$single_K[[i]]$summary$qmatrix <- qmatrix
  }
  
  # return modified project
  return(proj)
}

#------------------------------------------------
#' @title Match grouping against q-matrix
#'
#' @description Compares qmatrix output for a chosen value of K against a 
#'   \code{target_group} vector. Returns the order of \code{target_group} 
#'   groups, such that there is the best possible alignment against the qmatrix.
#'   For example, if the vector returned is \code{c(2,3,1)} then the second
#'   group in the target vector should be matched against the first group in the
#'   qmatrix, followed by the third group in the target vector against the
#'   second group in the qmatrix, followed by the first group in the target
#'   vector against the third group in the qmatrix.
#'
#' @param project a MALCOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param K compare against qmatrix output for this value of K
#' @param target_group the target group to be aligned against the qmatrix
#'
#' @export
#' @examples
#' # TODO

get_group_order <- function(project, K, target_group) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  assert_single_pos_int(K, zero_allowed = FALSE)
  assert_vector(target_group)
  assert_pos_int(target_group)
  
  # create target_qmatrix from target_group
  n <- length(target_group)
  target_qmatrix <- matrix(0, n, max(target_group))
  target_qmatrix[cbind(1:n, target_group)] <- 1
  
  # get active set and check non-zero
  s <- project$active_set
  if (s == 0) {
    stop("no active parameter set")
  }
  
  # check output exists for chosen K
  qmatrix <- project$output$single_set[[s]]$single_K[[K]]$summary$qmatrix
  if (is.null(qmatrix)) {
    stop(sprintf("no qmatrix output for K = %s of active set", K))
  }
  
  # check same number of rows
  if (nrow(qmatrix) != nrow(target_qmatrix)) {
    stop("target_group must have same number of elements as qmatrix output")
  }
  n <- nrow(qmatrix)
  
  # expand qmatrix and/or target_qmatrix to get same number of cols
  if (ncol(target_qmatrix) > ncol(qmatrix)) {
    qmatrix <- cbind( qmatrix, matrix(0, n, ncol(target_qmatrix) - ncol(qmatrix)) )
  }
  if (ncol(qmatrix) > ncol(target_qmatrix)) {
    target_qmatrix <- cbind( target_qmatrix, matrix(0, n, ncol(qmatrix) - ncol(target_qmatrix)) )
  }
  
  # calculate cost matrix
  cost_mat <- matrix(0,K,K)
  for (k1 in 1:K) {
    for (k2 in 1:K) {
      cost_mat[k1,k2] <- sum(qmatrix[,k1]*(1-target_qmatrix[,k2]))
    }
  }
  
  # get lowest cost permutation
  best_perm <- call_hungarian(cost_mat)$best_matching + 1
  best_perm_order <- order(best_perm)
  
  return(best_perm)
}

#------------------------------------------------
#' @title Get ESS
#'
#' @description Returns effective sample size (ESS) of chosen model run.
#'
#' @param project a MALCOT project, as produced by the function 
#'   \code{malecot_project()}
#' @param K get ESS for this value of K
#'
#' @export
#' @examples
#' # TODO

get_ESS <- function(project, K = NULL) {
  
  # check inputs
  assert_custom_class(project, "malecot_project")
  if (!is.null(K)) {
    assert_single_pos_int(K, zero_allowed = FALSE)
  }
  
  # get active set and check non-zero
  s <- project$active_set
  if (s == 0) {
    stop("no active parameter set")
  }
  
  # set default K to first value with output
  null_output <- mapply(function(x) {is.null(x$summary$ESS)}, project$output$single_set[[s]]$single_K)
  if (all(null_output)) {
    stop("no ESS output for active parameter set")
  }
  if (is.null(K)) {
    K <- which(!null_output)[1]
    message(sprintf("using K = %s by default", K))
  }
  
  # check output exists for chosen K
  ESS <- project$output$single_set[[s]]$single_K[[K]]$summary$ESS
  if (is.null(ESS)) {
    stop(sprintf("no ESS output for K = %s of active set", K))
  }
  
  return(ESS)
}
bobverity/MALECOT documentation built on May 13, 2019, 4:01 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
bobverity/MALECOT
Joint estimation of COI and population structure for malaria genetic data

R/main.R
In bobverity/MALECOT: Joint estimation of COI and population structure for malaria genetic data

R Package Documentation

Browse R Packages

We want your feedback!

bobverity/MALECOT Joint estimation of COI and population structure for malaria genetic data

R/main.R In bobverity/MALECOT: Joint estimation of COI and population structure for malaria genetic data

R Package Documentation

Browse R Packages

We want your feedback!

bobverity/MALECOT
Joint estimation of COI and population structure for malaria genetic data

R/main.R
In bobverity/MALECOT: Joint estimation of COI and population structure for malaria genetic data