R/create_data.R

Defines functions create_simulated_data create_family_structure_data

Documented in create_family_structure_data create_simulated_data

#' Create a random dataset of families
#' @param seed seed for the simulation
#' @param n number of families
#' @param adult_index_case_prob probability that the index case is adult
#' @param T_max number of days to run epidemic (scalar or vector of size n)
#' @return a the result as a tibble including the number of adults, the number of children, the index case, and the epidemic duration (in days) of each family
#' @export
#' @import magrittr
#' @importFrom tibble tibble
#' @importFrom dplyr select
#' @importFrom purrr map_dbl
#' @seealso  `create_simulated_data` `run_simulation`

create_family_structure_data <- function(seed = -1,n = 1000,
                                         adult_index_case_prob = 0.8, T_max=21){

  stopifnot(length(T_max)==1 | length(T_max)==n)

  if(seed>1){
    set.seed(seed = seed)
  }

  dat <- tibble(size = sample(2:7,size = n,replace = T),
                        adults= purrr::map_dbl(.x = size,function(sizei){sample(x = 1:sizei,size = 1)}),
                        childs = size -adults,
                        first.is.adult = ifelse(adults == 0,FALSE,
                                                ifelse(childs == 0, TRUE,
                                                sample(x = c(TRUE,FALSE),size = n,prob= c(adult_index_case_prob,1-adult_index_case_prob),replace = T))),
                        day = T_max) %>%
    dplyr::select(-size)
  return(dat)

}

#' Simulate the epidemic with in a household
#' @param seed seed for the simulation
#' @param family_structure_data dataset of families including the number of adults, number of children, and the index case (see `create_family_structure_data`)
#' @param beta_cc proabilty that a child will infect a child
#' @param beta_ac proabilty that a child will infect an adult
#' @param beta_ca proabilty that an adult will infect a child
#' @param beta_aa proabilty that an adult will infect an adult
#' @param P a vector of generation time distribution (default is generated by `load_P`)
#' @param T_max the length of the epidemic within the family
#' @param ssn the number of households to simulate
#'
#' @return a tibble with the epidemic development over time by day
#' @export
#' @import magrittr
#' @importFrom tibble as_tibble
#' @importFrom dplyr mutate select group_by summarise ungroup arrange inner_join
#' @importFrom tidyr  spread gather
#' @seealso `create_family_structure_data` `load_P`


create_simulated_data <- function(seed = -1, family_structure_data,beta_cc,beta_ca,beta_ac,beta_aa,P=NULL,
                                  T_max=45,ssn = 1){


  family_structure_data <- dplyr::mutate(family_structure_data,seed = 1000*seed+1:dplyr::n()) %>%
    dplyr::rename(true_day = day)
  dat <- dplyr::mutate(family_structure_data,
                       sims = purrr::pmap(list(adults,childs,first.is.adult,seed),
                                          function(adultsi,childsi,first.is.adulti,seedi){
                                            simulate_household(seed = seedi, adultsi,childsi,first.is.adulti,beta_cc,beta_ca,beta_ac,beta_aa,P,T_max,ssn)}))
  dat <- dplyr::mutate(dat,findex = 1:dplyr::n())
  dat <- tidyr::unnest(dat,cols = "sims")

  dat <- dplyr::group_by(dat,findex) %>%
    dplyr::filter(day == true_day) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(pos.adults = ifelse(first.is.adult, pos.adults+1,pos.adults),
                  pos.childs = ifelse(first.is.adult, pos.childs,pos.childs +1)) %>%
    dplyr::select(-true_day,-seed)

  return(dat)

}
yairgoldy/sl4hm documentation built on Feb. 3, 2021, 5:45 p.m.