TBornotTB: Analyze Tuberculosis Patients in Maryland

Documented in loglike_clust_cond loglike_obs_data

## SKG
## Likelihood calcs
## But like faster
## 2/4/20



#' Calculate the log likelihood of a cluster conditioned on size and smear pos. num
#'
#' @param par vector where the first parameter is "beta_0" and the second is "beta_1"
#' @param data data frame with columns
#' \describe{
#' \item{n}{number in cluster}
#' \item{n_pos}{number of positive smears in cluster}
#' \item{freq}{number of times this frequency occurs}
#' }
#' @param sampled_trees_summary data frame with the following columns
#' \describe{
#' \item{i_pos}{number of transmission from a smear positive person}
#' \item{n_pos}{number of positive smears}
#' \item{n}{cluster size}
#' }
#' @return log likelihood summed over the clusters
loglike_obs_data <- function(par,
                             data,
                             sampled_trees_summary,
                             return_neg = FALSE){

  p_plus <- as.numeric(1 / (1 + exp(-par[1] - par[2])))
  p_neg <- as.numeric(1 / (1 + exp(-par[1])))


  df <- sampled_trees_summary %>%
    dplyr::mutate(n1 = .data$n,
                  n1_pos = .data$n_pos) %>%
    dplyr::group_by(.data$n1,
                    .data$n1_pos) %>%
    tidyr::nest() %>%
    dplyr::mutate(loglike = purrr::map( .data$data,
                                        loglike_clust_cond,
                                        p_plus = p_plus,
                                        p_neg = p_neg) ) %>%
    dplyr::select(-.data$data) %>%
    tidyr::unnest(cols = loglike) %>%
    dplyr::rename(n = n1, n_pos = n1_pos) # this is dumb



  df_join <- dplyr::left_join(df, data, by = c("n", "n_pos"))


  loglike <- sum(df_join$freq * df_join$loglike)
  if(return_neg){
    loglike <- -loglike
  }
  return(loglike)


}


#' Calculate the log likelihood of a cluster conditioned on size and smear pos. num
#'
#' @param p_plus probability of smear pos
#' @param p_neg probability of smear neg
#' @param clust_df data frame with the following columns
#' \describe{
#' \item{i_pos}{number of transmission from a smear positive person}
#' \item{n_pos}{number of positive smears}
#' \item{n}{cluster size}
#' }
#' @return log likelihood summed over the clusters
loglike_clust_cond <- function(p_plus, p_neg,
                               clust_df){
#  browser()
  n <- clust_df$n[1]
  n_pos <- clust_df$n_pos[1]
  out <- with(clust_df, (p_plus)^i_pos *
     (p_neg)^(n - 1 - i_pos) *
    (1 - p_plus)^n_pos *
    (1 - p_neg)^(n - n_pos))
  if(any(is.na(out))) browser()
  return(log(sum(out)))
}