R/sample-trees-many-index-cases.R

Defines functions sample_unique_perms_o sample_uniform_trees_nxo

Documented in sample_uniform_trees_nxo sample_unique_perms_o

## SKG
## March 9, 2020
## faster/less memory tree sampling







#' Sample uniform trees many root nodes
#'
#' @param n total size
#' @param x total number of smear pos
#' @param B number of trees to sample
#' @return data frame with the following columns
#' \describe{
#' \item{n}{total size}
#' \item{n_pos}{number of smear pos}
#' \item{i_pos}{number of i-positive transmissions}
#' \item{i_neg}{number of i-negative transmissions}
#' \item{freq}{frequency of (n,x,i)}
#' }
#' @export
#' @details can have many root nodes
sample_uniform_trees_nxo <- function(n, x, B){

  ## If size 1, have trivial samples
  if(n == 1){
    df <- data.frame(n = n,
                     n_pos = x,
                     i_pos = 0,
                     i_neg = 0,
                     freq = B,
                     stringsAsFactors = FALSE)
    return(df)
  }

  ## Sample generation sizes
  g_vec <- 1 + rbinom(n = B, size = n-1, prob = .5) # number of generations where first generation is fixed

  g_tab <- table(g_vec)
  unique_g <- sort(unique(g_vec))
  i_list <- vector(mode = "list", length = length(unique_g))

  for(ii in 1:length(unique_g)){
    ## Sample unique permutations of generation sizes given g
    g <- unique_g[ii]
    perm_mat <- sample_unique_perms_o(g = g, n = n,
                                    B = as.numeric(g_tab[ii]))
    i_list[[ii]] <- sample_i(perm_mat, x)  # generate whole tree but only summarize i+
  }
  i_mat <- do.call('rbind', i_list)

  df <- data.frame(table(i_mat[,1], i_mat[,2]))
  colnames(df) <- c("i_pos", "i_neg", "freq")
  df$n <- n
  df$x <- x
  df$i_pos <- as.numeric(as.character(df$i_pos))
  df$i_neg <- as.numeric(as.character(df$i_neg))
  df <- df %>% dplyr::rename(n_pos = "x") %>%
      dplyr::select(.data$n, .data$n_pos, .data$i_pos, .data$i_neg, .data$freq) 

  return(df)


}



#' Sample unique permutations from a partition space (now with outsiders info infecting many)
#'
#' @param g number of generations
#' @param n total size of cluster
#' @param B number of permutations to draw
#' @return matrix of size g x B and the first row is all 1.  Each column is a unique permutation that sums to n and all are positive entries
#' @details multiple index case version
sample_unique_perms_o <- function(g, n, B){

  if(n == 1) {
    return(matrix(1, ncol = B, nrow = 1))
  } else if(g == n){
    return(matrix(1, ncol = B, nrow = g))
  }

  parts <- partitions::restrictedparts(n = n, m = g,
                                       include.zero = FALSE)
  wts <- apply(parts, 2, function(x){
    if(length(unique(x)) == 1) return(1)
    RcppAlgos::permuteCount(sort(unique(x)), freqs = table(x))
  })
  length(wts) == ncol(parts)
  part_inds <- sample(1:ncol(parts), size = B,
                      replace = TRUE, prob = wts / sum(wts))
  drawn_parts <- parts[, part_inds, drop = FALSE]
  drawn_perms <- plyr::alply(.data = drawn_parts,
                             .margins = 2,
                             .fun = function(x){
                               if(length(sort(unique(x))) == 1){
                                 out <- matrix(rep(x[1], g), ncol = 1, nrow = g)
                               } else{
                                  out <- matrix(RcppAlgos::permuteSample(v = sort(unique(x)),
                                                                  freqs = table(x), n = 1),
                                                ncol = 1)

                               }
                               return(out)
                             })
  drawn_perms <- do.call('cbind', drawn_perms)
  return(drawn_perms)

}
skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.