## SKG
## April 4?, 2020
## Likelihood for generator (truncated)
#' Calculate the log likelihood of a cluster conditioned on size and smear pos. num
#'
#' @param p_plus probability of smear pos
#' @param p_neg probability of smear neg
#' @param clust_df data frame with the following columns
#' \describe{
#' \item{i_pos}{number of transmission from a smear positive person}
#' \item{i_neg}{number of transmission from a smear negative person (optional)}
#' \item{n0}{number infected by generator}
#' \item{x0}{smear status of generator}
#' \item{n_pos}{number of positive smears}
#' \item{n}{cluster size}
#' \item{freq}{frequency of time this particular cluster occurs}
#' }
#' @param is_truncated logical default is FALSE
#' @param w_pos empirical weights of number of positive smears
#' @details if i_neg is not a column in clust_df we will use n-i_pos - 1 as i_neg. If 'is_truncated' is TRUE, we will calculate the the zero-truncated likelihood
#' @return log likelihood summed over the clusters
loglike_doubly_trunc <- function(p_plus, p_neg,
clust_df,
is_truncated = FALSE,
w_pos = .5){
## TODO
## Implement the doubly truncated likelihood
p0 <- get_p0(clust_df$x0, w_pos,
clust_df$n, clust_df$n_pos,
p_plus, p_neg)
pn0 <- get_pn0(clust_df$x0, clust_df$n0, clust_df$n,
p_plus, p_neg)
cond_p <- get_cond_p(clust_df$x0, clust_df$n0,
clust_df$n, clust_df$n_pos,
clust_df$i_pos, clust_df$i_neg,
p_plus, p_neg)
like <- p0 * pn0 * cond_p * clust_df$freq
if(any(is.na(like))) stop("Error in calculating likelihood. NAs present")
B <- sum(clust_df$freq)
## TODO
## top truncated version?
# print(log(sum(like)/B))
if((sum(like)/B) > 1) browser()
return(log(sum(like) / B))
}
#' Get the probability of a positive
#'
#' @return vector of probs
get_p0 <- function(x0, w_pos,
n, n_pos,
p_plus, p_neg){
1/2
# ifelse(x0 == 1, w_pos, 1-w_pos)
# (n_pos/n *(p_plus))/ (p_plus + p_neg)
}
#' Get the probability of init infections given generator smear
#'
#' @param x0 smear status of generator
#' @param n0 number infected by generator
#' @param n total cluster size (including generator)
#' @param p_plus probability of transfer given pos.
#' @param p_neg probability of transfer given neg.
#' @return vector of probabilities
get_pn0 <- function(x0, n0, n,
p_plus, p_neg){
p0 <- ifelse(x0 == 1, p_plus, p_neg)
prob <- numeric(length(p0))
for(ii in 1:length(p0)){
if(n[ii] == 1){
prob[ii] <- 1
} else {
denom <- sum(p0[ii]^(1:(n[ii]-1)))
num <- p0[ii]^n0[ii]
prob[ii] <- num / denom
}
}
return(prob)
}
#' Get the probability of init infections given generator smear
#'
#' @param x0 smear status of generator
#' @param n0 number infected by generator
#' @param n total cluster size (including generator)
#' @param n_pos number of positive
#' @param i_pos number of positive generations
#' @param i_neg number of negative generations
#' @param p_plus probability of transfer given pos.
#' @param p_neg probability of transfer given neg.
#' @return vector of probabilities
get_cond_p <- function(x0, n0,
n, n_pos,
i_pos, i_neg,
p_plus, p_neg){
prob <- numeric(length(x0))
n0_pos <- ifelse(x0 == 1, n0, 0)
n0_neg <- ifelse(x0 == 1, 0, n0)
x_cond <- n_pos - x0 # number of the rest of cluster positive
n_cond <- n - 1 # number in rest of cluster
i_pos_cond <- i_pos - n0_pos
i_neg_cond <- i_neg - n0_neg
for(ii in 1:length(x0)){
if(n_cond[ii] == 0){ # generator infected no one at all
prob[ii] <- 1
} else if(n_cond[ii] == n0[ii]){ # generator infected everyone in cluster
prob[ii] <- 1
} else if (n_cond[ii] == x_cond[ii]){ ## remaining people are all positive
prob[ii] <- 1
} else if(x_cond[ii] == 0){ # remaining people are all negative
prob[ii] <- 1
} else if(n0[ii] > 0){
sum_inds <- 0:(n_cond[ii] - n0[ii])
denom <- sum(p_plus^sum_inds *
p_neg^(n_cond[ii] - n0[ii] - sum_inds ))
num <- p_plus^(i_pos_cond[ii]) * p_neg^(i_neg_cond[ii])
prob[ii] <- num / denom
}
}
return(prob)
}
## NEXT TODO 4/4/20
## TODO tests and simulate
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.