## SKG
## APRIL 9, 2020
## Holy Thursday
## Trying the outside log like AGAIN
## maybe this time it will work
#' Get the log likelihood estimate for outside generated data
#'
#' @param par vector of beta0, beta1
#' @param data data frame with the following columns
#' \describe{
#' \item{obs_n}{number in cluster}
#' \item{obs_n_pos}{number of positive smears in cluster}
#' \item{freq}{number of times this frequency occurs}
#' }
#' @param sampled_trees data frame with the following columns
#' \describe{
#' \item{i_pos}{number of transmission from a smear positive person}
#' \item{i_neg}{number of transmission from a smear negative person}
#' \item{n_pos}{number of positive smears}
#' \item{n}{cluster size}
#' \item{freq}{time it occurs}
#' }
#' @param B number of samples to draw
#' @param return_neg logical indicating whether we should return negative log likelihood. Default TRUE
#' @param w_pos probability of a positive smear. Default is .5
#' @param sampled_vars_list pre computed list of sampling helpers. Default is NULL meaning we compute within the function.
#' @return log likelihood summed over the clusters
#' @export
#' @details TODO
loglike_outside_gen <- function(par, data,
sampled_trees,
B,
return_neg = TRUE,
w_pos = .5,
sampled_vars_list = NULL){
p_plus <- 1 / (1 + exp(-(sum(par) )))
p_neg <- 1 / (1 + exp(-par[1]))
sampled_trees_like <- like_sampled_trees(p_plus, p_neg,
sampled_trees) # df with (n, n_pos, avg_like)
df <- data %>%
dplyr::mutate(obs_n = .data$n,
obs_n_pos = .data$n_pos) %>%
dplyr::group_by(.data$obs_n,
.data$obs_n_pos,
.data$freq) %>%
tidyr::nest() %>%
dplyr::mutate(like = purrr::map(.data$data,
like_outside_nx,
sampled_trees = sampled_trees_like,
p_plus = p_plus,
p_neg = p_neg,
w_pos = w_pos,
B = B,
sampled_vars_list = sampled_vars_list) ) %>%
dplyr::select(-.data$data) %>%
tidyr::unnest(cols = like) %>%
dplyr::mutate(loglike = log(.data$like))
<<<<<<< HEAD
loglike <- sum(df$freq * df$loglike)
=======
loglike <- sum(df$freq * df$loglike)
>>>>>>> 7e1a77c693066842666d17ce2eedcb06dc5d9643
if(return_neg){
loglike <- -loglike
}
## print(loglike)
return(loglike)
}
#' Calculate the conditional likelihood for outside generator clusters
#'
#' @param data data frame with the following columns
#' \describe{
#' \item{n}{cluster size}
#' \item{n_pos}{number of positive smears}
#' }
#' @param sampled_trees data frame with the following columns
#' \describe{
#' \item{n}{cluster size}
#' \item{n_pos}{number of positive smears}
#' \item{avg_like}{like for this set}
#' }
#' @param p_plus probability of tranmsmitting disease from a positive smear
#' @param p_neg probability of transmistting disease from negative smear
#' @param w_pos probability of a positive smear
#' @param B number of samples
#' @param sampled_vars_list pre computed list of sampling helpers.
#' Default is NULL meaning we compute within the function.
#' @return average likelihood contribution from cluster of size n with n_pos positive smears
#' @details TODO
like_outside_nx <- function(data,
sampled_trees,
p_plus, p_neg,
w_pos, B,
sampled_vars_list = NULL){
n <- data$n
n_pos <- data$n_pos
stopifnot(length(n) == 1)
if(is.null(sampled_vars_list)){
## Sample the new things we need: (x0, n0, nprime, xprime)
sampled_vars_list <- sample_outside_gen_vars(n, n_pos, B)
} else {
sampled_vars_list <- sampled_vars_list[[paste0(n, "-", n_pos)]]
}
## Calculate the average *likelihood* through recursion and the previous sampled trees
like <- sapply(sampled_vars_list, like_inside_cond,
sampled_trees_calc = sampled_trees,
p_plus = p_plus, p_neg = p_neg,
w_pos = w_pos)
return(mean(like))
}
#' Calculate the likelihood, summing over different sub-trees
#'
#' @param sampled_vars_list list with the following entries
#' \describe{
#' \item{n_prime}{vector of non-empty partitions}
#' \item{n_pos_prime}{vector of possibly empty partitions}
#' \item{n0}{number outside generator infects}
#' \item{x0}{smear of outside generator}
#' }
#' @param sampled_trees_calc data frame with the following columns
#' \describe{
#' \item{n}{number of nodes in sampled tree}
#' \item{n_pos}{number of positive smears}
#' \item{avg_like}{pre-calculated likelihood for given tree}
#' }
#' @param p_plus probability of positive transmission
#' @param p_neg probability of a negative transmission
#' @param w_pos probablity of smear positive
#' @return a single number, the likelihood for the possibly multi-root tree
like_inside_cond <- function(sampled_vars_list,
sampled_trees_calc,
p_plus, p_neg,
w_pos){
n_vec <- sampled_vars_list$n_prime
n_pos_vec <- sampled_vars_list$n_pos_prime
x0 <- sampled_vars_list$x0
n0 <- sampled_vars_list$n0
if(length(n_vec) != length(n_pos_vec)) browser()
## inner_df <- data.frame(n = n_vec,
## n_pos = n_pos_vec)
## joined_df <- dplyr::left_join(inner_df, sampled_trees_calc,
## by = c("n", "n_pos"))
avg_like <- sapply(1:length(n_vec), function(ii){
sampled_trees_calc$avg_like[sampled_trees_calc$n == n_vec[ii] &
sampled_trees_calc$n_pos == n_pos_vec[ii]]
})
n <- sum(n_vec)
## probabilities
p_x0 <- prob_x0(x0, w_pos)
p_n0 <- prob_n0(p_plus, p_neg, x0, n0, n)
# #p_like1 <- joined_df$avg_like
p_like1 <- avg_like
like <- sum(p_x0 * p_n0 * p_like1)
if(is.na(like)) browser()
return(like)
}
#' Get the probability of infecting n0 people
#'
#' @param p_plus probability of infection if positive smear
#' @param p_neg probability of infection if negative smear
#' @param x0 0/1 -/+ smear for generator
#' @param n0 number of observed infections by generator
#' @param n maximum possible number of infections by generator
prob_n0 <- function(p_plus, p_neg, x0, n0, n){
p0 <- ifelse(x0 == 1, p_plus, p_neg)
denom <- sum(p0^(1:n))
num <- p0^n0
return(num/denom)
}
#' Return probability of given smear
#'
#' @param x0 observed smear (0/1 = -/+)
#' @param w_pos probability of a smear
prob_x0 <- function(x0, w_pos){
return(ifelse(x0 == 0, 1 - w_pos,
w_pos))
}
#' Calculate the likelihood for a single conditional tree
#'
#' @param p_plus probability of positive transmission
#' @param p_neg probability of negative transmission
#' @param sampled_trees data frame with the following columns
#' \describe{
#' \item{n}{number in cluster}
#' \item{n_pos}{number of postive smears in cluster}
#' \item{i_pos}{number of positive transmission}
#' \item{i_neg}{number of negative transmission}
#' \item{freq}{times this combination occurred}
#' }
#' @return data frame with average likelihood for clusters of size n, n_pos smears
like_sampled_trees <- function(p_plus, p_neg, sampled_trees){
sampled_trees$p_plus <- p_plus
sampled_trees$p_neg <- p_neg
sampled_trees <- sampled_trees %>%
dplyr::group_by(.data$n, .data$n_pos) %>% # will this work who knows
dplyr::summarize(avg_like = (
sum((1- .data$p_plus)^.data$n_pos *
(1- .data$p_neg)^(.data$n-.data$n_pos) *
.data$p_plus^(.data$i_pos) *
.data$p_neg^(.data$i_neg) *
.data$freq)) /
sum(.data$freq)
)
return(sampled_trees)
}
## NEXT
## SIMULATE TABLES UP to (n,x) n = 1, .., 5, x = 0, ..., 5 and save
## Then try this out PLEASE WORKPatri
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.