## Passes tests but doesn't agree with faster, loglike
#' Calculate the log likelihood for data
#'
#' @param par vector of parameters "beta_0" and "beta_1"
#' @param cluster_summary output from \code{summarize_clusters()}
#' @param sampled_trees draws from \code{simulate_many_cond_bp}
#' @param return_neg If TRUE, return the negative log like.
#' @param n_trials number of trials for each "average"
#' @return likelihood summing over the 'average' of cluster with given n and n_pos (single number), weighted by the data
loglike_cluster_summary <- function(par,
cluster_summary,
sampled_trees,
return_neg = FALSE,
n_trials = 10){
loglike <- rep(0, nrow(cluster_summary))
for(ii in 1:nrow(cluster_summary)){
n <- cluster_summary$n[ii]
n_pos <- cluster_summary$n_pos[ii]
n_vec <- rep(n, each = n_trials)
n_pos_vec <- rep(n_pos, each = n_trials)
sub_sampled_trees <- sampled_trees %>%
dplyr::filter(.data$cluster_size == n, .data$cluster_pos == n_pos)
loglike[ii] <- loglike_all_clusts(par = par,
n_vec = n_vec,
n_pos_vec = n_pos_vec,
sampled_trees = sub_sampled_trees,
return_neg = return_neg)
}
return(sum(cluster_summary$freq * loglike))
}
#' Calculate the log likelihood for data
#'
#' @param par vector of parameters "beta_0" and "beta_1"
#' @param n number in cluster
#' @param n_pos number of positive smears
#' @param sampled_trees output from \code{simulate_many_cond_bp} where all n and n_pos are the same for each cluster
#' @param return_neg If TRUE, return the negative log like.
#' @return likelihood of 'average' of cluster with given n and n_pos (single number)
loglike_all_clusts <- function(par,
n_vec,
n_pos_vec,
sampled_trees,
return_neg = FALSE){
tab <- as.data.frame(table(n_vec, n_pos_vec))
tab <- tab[tab$Freq != 0,]
tab$id <- 1:nrow(tab)
tab$like <- NA
## TODO: dplyrify or something
for(ii in 1:nrow(tab)){
n <- n_vec[ii]
n_pos <- n_pos_vec[ii]
trees_sub <- sampled_trees %>% dplyr::filter(.data$cluster_size == n,
.data$cluster_pos == n_pos)
tab$like[ii] <- like_sampled_cond(par = par,
sampled_trees = trees_sub,
n = n,
n_pos = n_pos)
}
if(any(is.na(tab$like) | is.nan(tab$like) | is.infinite(tab$like))) warning("NA/NAN/Inf likelihood")
total_loglike <- sum(tab$Freq * log(tab$like), na.rm = TRUE)
if(return_neg) total_loglike <- -total_loglike
return(total_loglike)
}
#' Calculate the likelihood for a cluster of size n and n_pos
#'
#' @param par vector of parameters "beta_0" and "beta_1"
#' @param sampled_trees output from \code{simulate_many_cond_bp} where all n and n_pos are the same for each cluster (cluster_size and cluster_pos)
#' @param n number in cluster
#' @param n_pos number of positive smears
#' @return likelihood of 'average' of cluster with given n and n_pos (single number)
like_sampled_cond <- function(par,
sampled_trees,
n, n_pos){
## Add up the like
likes <- sampled_trees %>% dplyr::group_by(cluster_id) %>%
tidyr::nest() %>%
dplyr::mutate(like = purrr::map(.data$data,
cluster_like_smear,
par = par,
n = n,
n_pos = n_pos) ) %>%
dplyr::select(-.data$data) %>%
tidyr::unnest(cols = c(.data$like))
tot_like <- sum(likes$like)
return(tot_like)
## plyr version is more readable but alas
##
# test <- plyr::ddply(.data = trees_df, .fun = cluster_like_smear,
# .variables = "cluster_id",
# n = n,
# par = par,
# n_pos = n_pos)
}
#' Calculate the likelihood for a cluster of this size
#'
#' @param par vector of parameters "beta_0" and "beta_1"
#' @param cluster_df data frame with the following columns
#' \describe{
#' \item{n_inf}{number this person infected}
#' \item{smear}{-1 or +1}
#' }
#' @param n number in cluster
#' @param n_pos number of positive smears
#' @return likelihood of cluster (single number)
cluster_like_smear <- function(par, cluster_df,
n,
n_pos){
p_plus <- 1 / (1 + exp(- (par["beta_0"] + par["beta_1"]) ))
p_minus <- 1 / (1 + exp(- (par["beta_0"]) ))
n_inf_plus <- sum(cluster_df$n_inf[cluster_df$smear > 0])
n_inf_minus <- sum(cluster_df$n_inf[cluster_df$smear < 0])
like <- (1- p_plus)^n_pos *
(1-p_minus)^(n - n_pos) *
p_plus^(n_inf_plus) *
p_minus^(n_inf_minus)
names(like) <- NULL
return(like)
}
#' Get the probability of transferring infection based on smear
#'
#' @param par vector of parameters "beta_0" and "beta_1"
#' @param smear (-1/+1)
#' return(probability between 0 and 1)
prob_trans <- function(par, smear){
smear <- (smear + 1) / 2
1 / (1 + exp(- (par["beta_0"] + par["beta_1"] * smear) ))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.