## Flip 'til failure
## 2/3/2020
## simulations
#' Summarize the clusters
#'
#' @param clusters_df a data frame with
#' @return data frame with the following columns
#' \describe{
#' \item{cluster_id}{unique cluster ID}
#' \item{person_id}{order of infection in the cluster}
#' \item{smear}{- 1 (negative) / + 1 (positive)}
#' \item{gen}{generation number (>=0)}
#' \item{inf_id}{ID of the infector}
#' \item{n_inf}{number of people infected by person}
#' \item{cluster_pos}{number of positive smears in the cluster}
#' \item{cluster_size}{number in cluster}
#' \item{censored}{whether the cluster end was censored or not}
#' }
#' @return data frame with
#' \describe{
#' \item{n}{number in cluster}
#' \item{n_pos}{number of positive smears in cluster}
#' \item{freq}{frequency this occurs}
#' }
summarize_clusters <- function(clusters_df){
df1 <- clusters_df %>% dplyr::mutate(id = paste(.data$cluster_size,
.data$cluster_pos, sep = "-")) %>%
dplyr::group_by(.data$cluster_id) %>% dplyr::summarize(id = .data$id[1],
n = .data$cluster_size[1],
n_pos = .data$cluster_pos[1])
df2 <- df1 %>% dplyr::group_by(.data$id) %>%
dplyr::summarize(freq = dplyr::n(),
n = .data$n[1],
n_pos = .data$n_pos[1]) %>%
dplyr::select(-.data$id)
return(df2)
}
#' Simulate the process of flipping until failure for K clusters
#'
#' @param K number of total clusters to simulate
#' @param inf_params vector with beta_0 and beta_1
#' @param smear_pos_prob probability of a smear positsive
#' @param max_size maximum size a cluster can be
#' @return data frame with the following columns
#' \describe{
#' \item{cluster_id}{unique cluster ID}
#' \item{person_id}{order of infection in the cluster}
#' \item{smear}{- 1 (negative) / + 1 (positive)}
#' \item{gen}{generation number (>=0)}
#' \item{inf_id}{ID of the infector}
#' \item{n_inf}{number of people infected by person}
#' \item{cluster_pos}{number of positive smears in the cluster}
#' \item{cluster_size}{number in cluster}
#' \item{censored}{whether the cluster end was censored or not}
#' }
#' @details breadth not depth. Generate generation by generation as opposed to going up the branch til termination.
#' @export
simulate_flip_til_failure <- function(K,
inf_params,
smear_pos_prob,
max_size = 30){
cluster_list <- vector(mode = "list", length = K)
for(ii in 1:K){
cluster_list[[ii]] <- flip_til_failure(cluster_id = ii,
inf_params = inf_params,
smear_pos_prob = smear_pos_prob,
max_size = max_size)
}
df <- dplyr::bind_rows(cluster_list)
return(df)
}
#' Simulate the process of flipping until failure for a single cluster
#'
#' @param cluster_id unique ID of the cluster
#' @param inf_params vector with beta_0 and beta_1
#' @param smear_pos_prob probability of a smear positsive
#' @param max_size maximum size a cluster can be
#' @return data frame with the following columns
#' \describe{
#' \item{cluster_id}{unique cluster ID}
#' \item{person_id}{order of infection in the cluster}
#' \item{smear}{- 1 (negative) / + 1 (positive)}
#' \item{gen}{generation number (>=0)}
#' \item{inf_id}{ID of the infector}
#' \item{n_inf}{number of people infected by person}
#' \item{cluster_pos}{number of positive smears in the cluster}
#' \item{cluster_size}{number in cluster}
#' \item{censored}{whether the cluster end was censored or not}
#' }
#' @details breadth not depth. Generate generation by generation as opposed to going up the branch til termination.
flip_til_failure <- function(cluster_id,
inf_params,
smear_pos_prob,
max_size = 30){
df <- data.frame(cluster_id = rep(cluster_id, max_size),
person_id = NA,
smear = NA,
gen = NA,
inf_id = NA,
n_inf = NA,
cluster_size = 1,
cluster_pos = NA,
censored = FALSE)
censored <- FALSE
p_pos <- 1 / (1 + exp(- inf_params["beta_0"] - inf_params["beta_1"]))
p_neg <- 1 / (1 + exp(- inf_params["beta_0"]))
cluster_size <- 1
smear_pos_prob <- ifelse(smear_pos_prob == 1, .999999999, # v hacky
ifelse(smear_pos_prob == 0, .00000000001,
smear_pos_prob))
gen_list <- vector(mode = "list", length = max_size)
gen_list[[1]] <- data.frame(cluster_id = cluster_id,
person_id = paste0("C", cluster_id, "-G1-N1"),
smear = sample(0:1, size = 1,
prob = c(1- smear_pos_prob,
smear_pos_prob)),
gen = 1,
inf_id = NA,
n_inf = NA,
cluster_size = 1,
cluster_pos = NA,
stringsAsFactors = FALSE)
for(gg in 2:(max_size - 1)){
next_gen_output <- generation_infection(cluster_id = cluster_id,
p_pos = p_pos,
p_neg = p_neg,
smear_pos_prob = smear_pos_prob,
gen = gg,
prev_gen = gen_list[[gg-1]],
cluster_size = cluster_size,
max_size = max_size)
if(cluster_size == next_gen_output$new_cluster_size){ # no new generations
break
}
cluster_size <- next_gen_output$new_cluster_size
gen_list[[gg]] <- next_gen_output$cur_gen
gen_list[[gg-1]]$n_inf <- next_gen_output$n_inf
if(max_size <= cluster_size){ # Hit the max size
censored <- TRUE
break
}
}
sim_df <- dplyr::bind_rows(gen_list)
sim_df$cluster_pos <- sum(sim_df$smear > 0)
sim_df$cluster_size <- nrow(sim_df)
sim_df$censored <- censored
sim_df$cluster_size <- cluster_size
sim_df$smear <- ifelse(sim_df$smear == 0, -1, sim_df$smear) # easy translation
return(sim_df)
}
#' Infect the current generation given the previous generation
#'
#' @param cluster_id unique cluster ID
#' @param p_pos probability of being infected given a smear positive
#' @param p_neg probability of being infected given a smear negative
#' @param smear_pos_prob probability of being smear positive
#' @param gen new generation number
#' @param prev_gen data frame with at least smear status (0/1) and person_id
#' @param cluster_size size of previous cluster
#' @param max_size maximum size of cluster
#' @return list with the following entries
#' \describe{
#' \item{new_cluster_size}{new cluster size with the current generation}
#' \item{cur_gen}{data frame of current generation (new infections)}
#' \item{n_inf}{number of infections for each person in the previous generation}
#' }
generation_infection <- function(cluster_id,
p_pos,
p_neg,
smear_pos_prob,
gen,
prev_gen,
cluster_size,
max_size){
n_new_inf <- rep(0, nrow(prev_gen))
smear <- prev_gen$smear
p <- ifelse(smear == 1, p_pos, p_neg)
p <- ifelse(p == 1, .99999999, p) # pretty hacky
for(ii in 1:nrow(prev_gen)){
n_new_inf[ii] <- rgeom(n = 1, prob = 1 - p[ii])
new_cluster_size <- cluster_size +
sum(n_new_inf[1:ii])
if(new_cluster_size >= max_size){
# browser()
if(ii > 1){
n_new_inf[ii] <- (max_size - cluster_size - sum(n_new_inf[1:(ii-1)]))
} else {
n_new_inf[ii] <- max_size - cluster_size
}
break # Have found enough
}
}
# browser()
n <- sum(n_new_inf)
if(n == 0){ # No new infections in next generations
return(list(new_cluster_size = cluster_size,
cur_gen = NULL,
n_inf = n_new_inf))
}
new_cluster_size <- n + cluster_size
cur_gen <- data.frame(cluster_id = rep(cluster_id, n),
person_id = paste0("C", cluster_id, "-G", gen, "-N", 1:n),
smear = sample(0:1, size = n, replace = TRUE,
prob = c(1- smear_pos_prob,
smear_pos_prob)),
gen = gen,
inf_id = rep(prev_gen$person_id, times = n_new_inf),
n_inf = NA,
cluster_size = NA,
cluster_pos = NA,
stringsAsFactors = FALSE)
return(list(new_cluster_size = n + cluster_size,
cur_gen = cur_gen,
n_inf = n_new_inf))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.