## SKG
## March 12, 2020
##
## Generate clusters based on an outsider
## Basically we will relabel the no-outsider simulations
## All generations go down by one number
## Will take out the index person
## update i accordingly
#' Simulate the process of flipping until failure for K clusters
#'
#' @param K number of total clusters to simulate
#' @param inf_params vector with beta_0 and beta_1
#' @param smear_pos_prob probability of a smear positsive
#' @param max_size maximum size a cluster can be
#' @param start_at_outsider if TRUE, we start at an unobserved outsider instead of a single index case
#' @param keep_zero_clusters Do I keep track of the the cluster consisting only of the unobserved root node? Default is FALSE.
#' @param sample_covariates used for multi-variate people
#' @return data frame with the following columns
#' \describe{
#' \item{cluster_id}{unique cluster ID}
#' \item{person_id}{order of infection in the cluster}
#' \item{smear}{- 1 (negative) / + 1 (positive)}
#' \item{gen}{generation number (>=0)}
#' \item{inf_id}{ID of the infector}
#' \item{n_inf}{number of people infected by person}
#' \item{cluster_pos}{number of positive smears in the cluster}
#' \item{cluster_size}{number in cluster}
#' \item{censored}{whether the cluster end was censored or not}
#' }
#' @details breadth not depth. Generate generation by generation as opposed to going up the branch til termination.
#' @export
#' @examples
#' inf_params <- c("beta_0" = -2, "beta_1" = 1)
#' smear_pos_prob <- .8
#' max_size <- 30
#' K <- 5
#' out <- simulate_outbreak(K = K,
#' inf_params = inf_params,
#' smear_pos_prob = smear_pos_prob,
#' max_size = max_size)
simulate_outbreak <- function(K,
inf_params,
smear_pos_prob,
max_size = 50,
start_at_outsider = FALSE,
keep_zero_clusters = FALSE,
sample_covariates = NULL){
clusters <- simulate_flip_til_failure(K = K,
inf_params = inf_params,
smear_pos_prob = smear_pos_prob,
max_size = max_size)
if(start_at_outsider){
clusters <- format_to_outsider(clusters,
keep_zero_clusters)
}
return(clusters)
}
#' Make the root node an outsider and adjust accordingly
#'
#' @param clusters data frame with the following columns
#' @param keep_zero_clusters Do I keep track of the the cluster consisting only of the unobserved root node? Default is FALSE.
#' \describe{
#' \item{cluster_id}{unique cluster ID}
#' \item{person_id}{order of infection in the cluster}
#' \item{smear}{- 1 (negative) / + 1 (positive)}
#' \item{gen}{generation number (>=0)}
#' \item{inf_id}{ID of the infector}
#' \item{n_inf}{number of people infected by person}
#' \item{cluster_pos}{number of positive smears in the cluster}
#' \item{cluster_size}{number in cluster}
#' \item{censored}{whether the cluster end was censored or not}
#' }
format_to_outsider <- function(clusters,
keep_zero_clusters = FALSE){
## change generations
clusters <- clusters %>%
dplyr::mutate(gen = .data$gen - 1,
cluster_size = .data$cluster_size - 1) %>%
dplyr::mutate(person_id = move_up_gen(.data$person_id)) %>%
dplyr::mutate(inf_id = move_up_gen(.data$inf_id)) %>%
dplyr::mutate(inf_id = ifelse(.data$gen == 1, NA,
.data$inf_id))
if(keep_zero_clusters){
clusters <- clusters %>%
dplyr::filter(.data$cluster_size == 0 | .data$gen !=0)
} else {
clusters <- clusters %>%
dplyr::filter(.data$gen !=0)
}
clusters <- clusters %>%
dplyr::group_by(.data$cluster_id) %>%
dplyr::mutate(cluster_pos = sum(.data$smear > 0, na.rm = TRUE))
return(clusters)
}
#' Change ID by subtracting one to the generation
#'
#' @param id format like "CX-GY-NZ"
#' @return id now like "CX-G(Y-1)-NZ"
move_up_gen <- function(id){
gen <- as.numeric(gsub(".*-G(.+)-N.*", "\\1", id))
new_id <- sapply(1:length(id), function(x) {
y <- id[x]
g <- gen[x] - 1
gsub("-G(.+)-N", paste0("-G", g,
"-N"), y)})
return(new_id)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.