TBornotTB: Analyze Tuberculosis Patients in Maryland

## SKG
## April 4?, 2020
## Likelihood for generator (truncated)


#' Calculate the log likelihood of a cluster conditioned on size and smear pos. num
#'
#' @param p_plus probability of smear pos
#' @param p_neg probability of smear neg
#' @param clust_df data frame with the following columns
#' \describe{
#' \item{i_pos}{number of transmission from a smear positive person}
#' \item{i_neg}{number of transmission from a smear negative person (optional)}
#' \item{n0}{number infected by generator}
#' \item{x0}{smear status of generator}
#' \item{n_pos}{number of positive smears}
#' \item{n}{cluster size}
#' \item{freq}{frequency of time this particular cluster occurs}
#' }
#' @param is_truncated logical default is FALSE
#' @param w_pos empirical weights of number of positive smears
#' @details if i_neg is not a column in clust_df we will use n-i_pos - 1 as i_neg.  If 'is_truncated' is TRUE, we will calculate the the zero-truncated likelihood
#' @return log likelihood summed over the clusters
loglike_doubly_trunc  <- function(p_plus, p_neg,
                               clust_df,
                               is_truncated = FALSE,
                               w_pos = .5){
 
  ## TODO

  ## Implement the doubly truncated likelihood
  p0 <- get_p0(clust_df$x0, w_pos,
               clust_df$n, clust_df$n_pos,
               p_plus, p_neg)
  pn0 <- get_pn0(clust_df$x0, clust_df$n0, clust_df$n,
                 p_plus, p_neg)
  cond_p <- get_cond_p(clust_df$x0, clust_df$n0,
                       clust_df$n, clust_df$n_pos,
                       clust_df$i_pos, clust_df$i_neg,
                       p_plus, p_neg)
 
  like <- p0 * pn0 * cond_p * clust_df$freq
  
  if(any(is.na(like))) stop("Error in calculating likelihood.  NAs present")
  B <- sum(clust_df$freq)
  ## TODO
  ## top truncated version?
  
 # print(log(sum(like)/B))
  if((sum(like)/B) > 1) browser()
  return(log(sum(like) / B))
}


#' Get the probability of a positive
#' 
#' @return vector of probs
get_p0 <- function(x0, w_pos,
                   n, n_pos,
                   p_plus, p_neg){
  1/2
  #  ifelse(x0 == 1, w_pos, 1-w_pos)
# (n_pos/n  *(p_plus))/ (p_plus + p_neg)
}


#' Get the probability of init infections given generator smear
#' 
#' @param x0 smear status of generator
#' @param n0 number infected by generator
#' @param n total cluster size (including generator)
#' @param p_plus probability of transfer given pos.
#' @param p_neg probability of transfer given neg.
#' @return vector of probabilities
get_pn0 <- function(x0, n0, n,
                    p_plus, p_neg){
  p0 <- ifelse(x0 == 1, p_plus, p_neg)
  prob <- numeric(length(p0))
  for(ii in 1:length(p0)){
    if(n[ii] == 1){
      prob[ii] <- 1
    } else {
      denom <- sum(p0[ii]^(1:(n[ii]-1)))
      num <- p0[ii]^n0[ii]
      prob[ii] <- num / denom
    }
  }
  return(prob)
}



#' Get the probability of init infections given generator smear
#' 
#' @param x0 smear status of generator
#' @param n0 number infected by generator
#' @param n total cluster size (including generator)
#' @param n_pos number of positive
#' @param i_pos number of positive generations
#' @param i_neg number of negative generations
#' @param p_plus probability of transfer given pos.
#' @param p_neg probability of transfer given neg.
#' @return vector of probabilities
get_cond_p <- function(x0, n0, 
                       n, n_pos,
                       i_pos, i_neg,
                       p_plus, p_neg){
  
  prob <- numeric(length(x0))
  n0_pos <- ifelse(x0 == 1, n0, 0)
  n0_neg <- ifelse(x0 == 1, 0, n0)
  x_cond <- n_pos - x0 # number of the rest of cluster positive
  n_cond <- n - 1  # number in rest of cluster
  i_pos_cond <- i_pos - n0_pos
  i_neg_cond <- i_neg - n0_neg
  for(ii in 1:length(x0)){
    
    if(n_cond[ii] == 0){ # generator infected no one at all
      prob[ii] <- 1
    } else if(n_cond[ii] == n0[ii]){ # generator infected everyone in cluster
      prob[ii] <- 1
    } else if (n_cond[ii] == x_cond[ii]){ ## remaining people are all positive
      prob[ii] <- 1
    } else if(x_cond[ii] == 0){ # remaining people are all negative
      prob[ii] <- 1
    } else if(n0[ii] > 0){

        sum_inds <- 0:(n_cond[ii] - n0[ii])
        denom <- sum(p_plus^sum_inds *
                       p_neg^(n_cond[ii] - n0[ii] - sum_inds ))
        
        
        num <- p_plus^(i_pos_cond[ii]) * p_neg^(i_neg_cond[ii])
        prob[ii] <- num / denom
    }
  }
  
  return(prob)
  
}

## NEXT TODO 4/4/20
## TODO tests and simulate

skgallagher/TBornotTB documentation built on April 21, 2020, 1:19 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

skgallagher/TBornotTB
Analyze Tuberculosis Patients in Maryland

R/loglike-doubly-trunc.R
In skgallagher/TBornotTB: Analyze Tuberculosis Patients in Maryland

Defines functions get_cond_p get_pn0 get_p0 loglike_doubly_trunc

R Package Documentation

Browse R Packages

We want your feedback!

skgallagher/TBornotTB Analyze Tuberculosis Patients in Maryland

R/loglike-doubly-trunc.R In skgallagher/TBornotTB: Analyze Tuberculosis Patients in Maryland

Defines functions get_cond_p get_pn0 get_p0 loglike_doubly_trunc

R Package Documentation

Browse R Packages

We want your feedback!

skgallagher/TBornotTB
Analyze Tuberculosis Patients in Maryland

R/loglike-doubly-trunc.R
In skgallagher/TBornotTB: Analyze Tuberculosis Patients in Maryland