R/likelihood-sampler.R

Defines functions sample_unique_perms1 sample_unique_perms0 sample_RUP0 sample_outside_gen_vars

## SKG
## April 9, 2020 - Holy Thursday
## wahh
## sampling some outside stuff. UGH


#' Sample the outside helpers
#'
#' @param n size of cluster
#' @param n_pos number of positive smears
#' @param B number of draws to make
#' @return list of lists.
#' Second list has columns x0, n0, n_prime, n_pos_prime
#' @details x0 is 0/1, n0 is between 1 and n, n_prime is a vector of positive entries that adds to n and is of length n0.  n_pos_prime is a vector of non-negative entries that add to n_pos and is of length n0.
sample_outside_gen_vars <- function(n, n_pos, B){


    sampled_var_list <- vector(mode = "list", length = B)
    x0 <- sample(0:1, size = B, replace = TRUE)
    
    if(n == 1){
        for(ii in 1:B){
            new_list <- list(x0 = x0[ii],
                             n0 = 1,
                             n_prime = 1,
                             n_pos_prime = n_pos)
            sampled_var_list[[ii]] <- new_list
        }
        return(sampled_var_list)
    }

    n0 <- sample(1:(n), size = B, replace = TRUE)
    
    

    for(ii in 1:B){
        n_prime <- as.numeric(sample_unique_perms1(g = n0[ii],
                                       n = n,
                                       B = 1))

        n_pos_prime <- as.numeric(sample_RUP0(g = n0[ii],
                                              n_prime = n_prime,
                                              n_pos = n_pos,
                                              B = B)
                                  )
        if(any(n_pos_prime > n_prime)) browser()

        new_list <- list(x0 = x0[ii],
                         n0 = n0[ii],
                         n_prime = n_prime,
                         n_pos_prime = n_pos_prime)
        sampled_var_list[[ii]] <- new_list
    }

    return(sampled_var_list)
}


#' Sample a random uniform partition of smears according to n_prime
#'
#' @param g number of generations
#' @param n_prime number in each generation
#' @param total number of n_positives to distribute among generations
#' @param B total samples
#' @return vector of same length of n_prime that sams to n_pos
sample_RUP0 <- function(g, n_prime, n_pos, B){
    if(n_pos == 0){
        return(rep(0, length(n_prime)))
    } else if(n_pos == sum(n_prime)){
        return(n_prime)
    }
    perm <- sample(c(rep(1, n_pos), rep(0, sum(n_prime) - n_pos)))
    n_pos_prime <- numeric(length(n_prime))
    n_pos_prime[1] <- sum(perm[1:n_prime[1]])
    if(length(n_prime) == 1){
        return(n_pos_prime)
    }
    for(ii in 2:length(n_prime)){
        start_ind <- sum(n_prime[1:(ii-1)]) + 1
        stop_ind <- sum(n_prime[1:ii])
        n_pos_prime[ii] <- sum(perm[start_ind:stop_ind])
    }
    if(sum(n_pos_prime) != n_pos) browser()
    return(n_pos_prime)
    

}



#' Sample unique permutations from a partition space
#'
#' @param g number of generations
#' @param n total size of cluster
#' @param B number of permutations to draw
#' @return matrix of size g x B. Each column is a unique permutation that sums to n and all are non-negative entries
sample_unique_perms0 <- function(g, n, B){

    if(n == 0){
        return(matrix(0, ncol = B, nrow = g))
    }
    if(g == 1){
        return(matrix(n, ncol = B, nrow = 1))
    }
    if(g == n){
        return(matrix(1, ncol = B, nrow = g))
    }

    parts <- partitions::restrictedparts(n = n, m = g,
                                         include.zero = TRUE)
    wts <- apply(parts, 2, function(x){
        if(length(unique(x)) == 1) return(1)
        RcppAlgos::permuteCount(sort(unique(x)), freqs = table(x))
    })
    length(wts) == ncol(parts)
    part_inds <- sample(1:ncol(parts), size = B,
                        replace = TRUE, prob = wts / sum(wts))
    drawn_parts <- parts[, part_inds, drop = FALSE]
    drawn_perms <- plyr::alply(.data = drawn_parts,
                               .margins = 2,
                               .fun = function(x){
                                   if(length(sort(unique(x))) == 1){
                                       out <- matrix(rep(x[1], g), ncol = 1, nrow = g)
                                   } else{
                                       out <- matrix(RcppAlgos::permuteSample(v = sort(unique(x)),
                                                                              freqs = table(x), n = 1),
                                                     ncol = 1)

                                   }
                                   return(out)
                               })
    drawn_perms <- do.call('cbind', drawn_perms)
    return(drawn_perms)

}


#' Sample unique permutations from a partition space
#'
#' @param g number of generations
#' @param n total size of cluster
#' @param B number of permutations to draw
#' @return matrix of size g x B and  Each column is a unique permutation that sums to n and all are positive entries.  First column doesn't have to be a 1.
sample_unique_perms1 <- function(g, n, B){

  if(n == 1) {
    return(matrix(1, ncol = B, nrow = 1))
  } else if(g == 1){
      return(matrix(n, ncol = B, nrow = 1))
  } else if(g == n){
    return(matrix(1, ncol = B, nrow = g))
  }

  parts <- partitions::restrictedparts(n = n, m = g,
                                       include.zero = FALSE)
  wts <- apply(parts, 2, function(x){
    if(length(unique(x)) == 1) return(1)
    RcppAlgos::permuteCount(sort(unique(x)), freqs = table(x))
  })
  length(wts) == ncol(parts)
  part_inds <- sample(1:ncol(parts), size = B,
                      replace = TRUE, prob = wts / sum(wts))
  drawn_parts <- parts[, part_inds, drop = FALSE]
  drawn_perms <- plyr::alply(.data = drawn_parts,
                             .margins = 2,
                             .fun = function(x){
                               if(length(sort(unique(x))) == 1){
                                 out <- matrix(rep(x[1], g), ncol = 1, nrow = g)
                               } else{
                                  out <- matrix(RcppAlgos::permuteSample(v = sort(unique(x)),
                                                                  freqs = table(x), n = 1),
                                                ncol = 1)

                               }
                               return(out)
                             })

    drawn_perms <- do.call('cbind', drawn_perms)
 #   if(nrow(drawn_perms) != g) browser()
  return(drawn_perms)

}



## Tomorrow:
## Change in0x0 sampler to have option to just sample i
## 
## Run simulation to populate tables of n,x, i, freq
##
## Test poor neglected functions in likelihood-outside and likelihood-sampler

## TODO QUESTION:  What is difference between sampling smears from RUP>=0 and randomly permuting a string of 0s and 1s.  is there any
skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.