R/simulation_base_functions.R

Defines functions Q_update I_O_update C_unify C_update simulate_transmission initiate_nw aggregate_simulation network_generate R0_adjust init_para assign_para network_covid_simulate g_transmissibility incub

Documented in init_para network_covid_simulate network_generate simulate_transmission

###############################################
# Preparation functions
###############################################

# incubation time: shape = 7.92, scale = 0.69
incub <- function(x) floor(pmax(pmin(rgamma(x, shape = 7.92, scale = 0.69), 14),1)) # maximum incubation time is 14 days

# here we use the shape of weibull from the science paper.
# The onset should be the mode of weibull, so we compute the scale from the onset .
# weibull has the property of keeping the shape, and mode change with scale
# physical property of weibull also make sense: single event happen with an rate of t^k

g_transmissibility <- function(t_onset, para) para$R0_adj/para$num_cc * dweibull(1:(para$len_sim+1), shape = 2.826, scale = t_onset/0.8568)
# plot(g_transmissibility(20))
# weibull distribution: shape: 2.8 scale 5.7
# mean 5.665*(1-1/2.826)^(1/2.826)
# normalized transmissibility per contact para$R0_adj/para$num_cc

###############################################
# external functions (all function that could actually be used by other users)
###############################################
#' @importFrom magrittr %>%
#' @export
magrittr::`%>%`

#' @import stats
NULL

#' @import ergm
NULL

#' wrapper function to simulate under four containment scenario, for multiple repeats.
#'
#' @param rep_num numeric number of epidemic to simulate, >100 is recommended
#' @param network_num numeric number of synthetic population to construct, could be less than rep_num to save time an space, the simulation
#' will randomly sample one of the synthetic populations for each epidemic simulation
#' @param output character prefix for saving the network data, you are encouraged to specify one otherwise default "example" will be used,
#' potentially erasing previous analysis.
#' @param para list of parameters. If you want to set up your own parameter, using init_para and modify the output.
#' @param len_sim numeric number of days to simulate for the outbreak, default 100.
#'
#' @return list containing simulation results. Six fields are included: new_daily_case,  quarantine_daily, Re_daily, RDT_used, PCR_used,
#' death_daily. each output is a list of four matrix, one row represent one simulation of epidemic: use [[1]] to access the results of
#' baseline scenario without any containment; use [[2]] to access results of RDT containment; use [[3]] to access results of PCR containment;
#' use [[4]] to access results of RDT + PCR containment.
#' @export
#'
#' @examples
#' para <- list()
#' para$community.pop_sz <- 200
#' para$pop_sz <- 400
#' para <- init_para(para = para)
#' results <- network_covid_simulate(rep_num = 1, network_num = 1, output = "example", para = para)
network_covid_simulate <- function(rep_num = 1, network_num = 20, output = "example", para = NA, len_sim = 100) {
  # this is a wrapper function for simulating
  if(output == "example"){
    print("Using 'example' as the output prefix, you are strongly recommended to specifiy a prefix for the ease of output analysis")
  }else{
    print(paste0("Using ", output, " as the output prefix."))
  }
  if(is.na(para)[1]){
    print(paste0("No population specified, using the default parameter setting"))
    para <- init_para()
  }
  print("---------------------------------------------")
  print("-------- simulation settings ----------------")
  print("---------------------------------------------")
  print(paste0("Number of simulations: ",rep_num))
  print(paste0("Population size: ",para$pop_sz))
  print(paste0("Population age distribution: 0-14: ", para$age_dist[1]*100, "%;  15-24: ",
               para$age_dist[2]*100,"%;  25+: ", para$age_dist[3]*100, "%"))
  print(paste0("Household size distribution: 1: ", para$HH_dist[1]*100, "%;  2-3: ",
               para$HH_dist[2]*100,"%;  4-5: ", para$HH_dist[3]*100, "%;  6+: ", para$HH_dist[4]*100, "%"))
  if(is.null(para$cc_cyl)){
    print(paste0("Number of contact: ", para$num_cc))
  }else{
    print(paste0("Number of contact: ", para$num_cc * para$cc_cyl))
  }
  print(paste0("Proportion of non-household contact reduced by physical distancing: ", (1-para$Non_HH_CC_rate)*100, "%"))
  print(paste0("Initial cases: ", para$E_0))
  if(is.null(para$cc_cyl)){
    print(paste0("R0: ", para$R0))
  }else{
    print(paste0("R0: ", para$R0 * para$cc_cyl))
  }
  # generate networks
  print("-----------------------------------------------------------")
  print("-------- synthetic population simulation ------------------")
  print("-----------------------------------------------------------")
  nw <- list()
  nw[[3]] <- 4
  for(nw_id in 1:network_num){
    print(paste0("Generate network: ", nw_id))
    nw <- network_generate(para, output = paste0(output, ".", nw_id), searched_clustering_number = nw[[3]])
  }
  # simulate all four scenarios
  print("-----------------------------------------------------------")
  print("-------- transmission simulation --------------------------")
  print("-----------------------------------------------------------")
  new_daily_case <- list()
  quarantine_daily <- list()
  Re_daily <- list()
  RDT_used <- list()
  PCR_used <- list()
  death_daily <- list()
  for(sc in 1:4){ # save matrix for each scenario
    new_daily_case[[sc]] <- matrix(nrow = rep_num, ncol = len_sim)
    quarantine_daily[[sc]] <- matrix(nrow = rep_num, ncol = len_sim)
    Re_daily[[sc]] <- matrix(nrow = rep_num, ncol = len_sim)
    RDT_used[[sc]] <- matrix(nrow = rep_num, ncol = len_sim)
    PCR_used[[sc]] <- matrix(nrow = rep_num, ncol = len_sim)
    death_daily[[sc]] <- matrix(nrow = rep_num, ncol = len_sim)
  }
  for(rep_id in 1:rep_num){
    NW_SIM <- NA
    sim_rslt <- list()
    while(is.na(NW_SIM)[1]){
      try({ # there might be error when loading network
        idx <- sample(1:network_num, 1)
        load(paste0("Networks/", output, ".",idx, ".network.Rdata"))
      })
    }
    sim_rslt[[1]] <- simulate_transmission(NW_SIM = NW_SIM, PCR = F, RDT = F, len_sim = len_sim)
    print(paste0("No containment simulation: ", rep_id))
    sim_rslt[[2]] <- simulate_transmission(NW_SIM = NW_SIM, PCR = F, RDT = T, len_sim = len_sim)
    print(paste0("RDT containment simulation: ", rep_id))
    sim_rslt[[3]] <- simulate_transmission(NW_SIM = NW_SIM, PCR = T, RDT = F, len_sim = len_sim)
    print(paste0("PCR containment simulation: ", rep_id))
    sim_rslt[[4]] <- simulate_transmission(NW_SIM = NW_SIM, PCR = T, RDT = T, len_sim = len_sim)
    print(paste0("PCR & RDT containment simulation: ", rep_id))

    for(sc in 1:4){ # save matrix for each scenario
      new_daily_case[[sc]][rep_id, ] <- sim_rslt[[sc]]$new_daily_case
      quarantine_daily[[sc]][rep_id, ] <- sim_rslt[[sc]]$quarantine_daily
      Re_daily[[sc]][rep_id, ] <- sim_rslt[[sc]]$Re_daily
      RDT_used[[sc]][rep_id, ] <- sim_rslt[[sc]]$RDT_used
      PCR_used[[sc]][rep_id, ] <- sim_rslt[[sc]]$PCR_used
      death_daily[[sc]][rep_id, ] <- sim_rslt[[sc]]$death_daily
    }
  }
  results_four_strategy <- list()
  results_four_strategy$new_daily_case <- new_daily_case
  results_four_strategy$quarantine_daily <- quarantine_daily
  results_four_strategy$Re_daily <- Re_daily
  results_four_strategy$RDT_used <- RDT_used
  results_four_strategy$PCR_used <- PCR_used
  results_four_strategy$death_daily <- death_daily
  return(results_four_strategy)
}

# using assign_para to ensure variable changes would not be masked by init_para
assign_para <- function(para, name, value){
  if(is.null(para[[name]])){
    para[[name]] <- value
  }
  return(para)
}
#' initiate default parameters for simulation
#'
#' @param setting integer. rural simulation; 2 for urban, 3 for slum
#' @param country_id 1 stands for Uganda demographic
#' @param social_distancing_flg 1 stands for no social distancing
#'
#' @return a list contains all parameters at default setting for each scenatrio
#' @export
#'
#' @examples # initialise a default parameter set
#' para <- init_para()
init_para <- function(setting = 2, country_id = 1, social_distancing_flg = 1, para = list()){

  ###############################
  # parameters relevant to network
  ###############################
  para <- assign_para(para, "setting", setting)  # 1=rural 2=affluent 3=slum
  # Environment setup
  para <- assign_para(para, "pop_sz", 1000)  # 5000
  para <- assign_para(para, "community.pop_sz", 100)
  para <- assign_para(para, "Non_HH_CC_rate", c(1,.8,.6,.4,.2)[social_distancing_flg])
  community_setting <- c("Rural", "Non-slum urban", "Slum")[setting]
  # print(paste0("Simulate ",para$pop_sz," individuals in ",community_setting," setting"))
  ##########################################
  # loading country specific variables:
  # age distribution & household
  ##########################################
  if(country_id == 1){
    para <- assign_para(para, "age_dist", c(0.481, 0.203, 0.316) )# Uganda
    para <- assign_para(para, "HH_dist", c(.11, .22, .27, .4)) # Uganda
  }else if(country_id == 2){
    para <- assign_para(para, "age_dist", c(0.292, 0.193, 0.515)) # South africa
    para <- assign_para(para, "HH_dist", c(.27, .35, .23, .15))  # South Africa
  }else if(country_id == 3){
    para <- assign_para(para, "age_dist", c(0.419, 0.195, 0.386))  # kenya
    para <- assign_para(para, "HH_dist", c(.19, .28, .3, .23)) # kenya
  }else if(country_id == 4){
    para <- assign_para(para, "age_dist", c(0.440, 0.190, 0.370))  # nigeria
    para <- assign_para(para, "HH_dist", c(.16, .26, .26, .32) ) # nigeria
  }

  ##########################################
  # processing demographic information;
  # for details: refer to the supplementary methods
  ##########################################
  # para$HH_affluent_dist <- c(.31, .5, .18, .02) # UK

  if (para$setting==1) {
    para <- assign_para(para, "num_cc", 7) # set daily close contact to be 7
    para <- assign_para(para, "family_sz", 5) # average household size 5
    # set the percentage of HH_cc
    para <- assign_para(para, "percent_HH_cc", .5)
  }else if (para$setting==2) {
    para <- assign_para(para, "num_cc", 13)
    para <- assign_para(para, "family_sz", 5)
    para <- assign_para(para, "percent_HH_cc", .23)
  }else if (para$setting==3) {
    para <- assign_para(para, "num_cc", 14)
    para <- assign_para(para, "family_sz", 15)
    para <- assign_para(para, "percent_HH_cc", .5)
    para <- assign_para(para, "HH_dist",  c(.00, .06, .17, .77) ) # afganistan
  }else print ("Parameter setting error")

  ########################################################################################
  # scale the contact number: if >15, simulate two rounds of infection each day
  ########################################################################################
  if(para$num_cc > 15 & para$num_cc <= 30){
    para$Tnum_cc <- para$num_cc # create a separate variable for total contact number
    para$cc_cyl <- 2 # use 2 simulated networks, and two rounds of infection within one day
    para$num_cc <- para$Tnum_cc/para$cc_cyl
  }else if(para$num_cc > 30){
    print("we don't support contact number above 30, please use smaller number.")
    break
  }

  # load the contact structure
  para <- assign_para(para, "age_mix", contact_all[[country_id]])
  para <- assign_para(para, "Home_age_mix", contact_home[[country_id]])
  # adjust the age matrix to represent the specified household contact rate
  para$age_mix <- para$Home_age_mix + (para$age_mix - para$Home_age_mix) *
    sum(para$Home_age_mix)/sum(para$age_mix - para$Home_age_mix) *  (1-para$percent_HH_cc)/para$percent_HH_cc

  # adjust R0 for 1) young people susceptibility 2) subclinical cases
  # adjust for the social distancing
  para$num_cc_scdst <- para$num_cc * ((1-para$percent_HH_cc)*para$Non_HH_CC_rate + para$percent_HH_cc) # reduce the number of cc
  para$age_mix_scdst <- para$Home_age_mix + (para$age_mix - para$Home_age_mix) * para$Non_HH_CC_rate
  para$percent_HH_cc_scdst <- para$percent_HH_cc/((1-para$percent_HH_cc)*para$Non_HH_CC_rate + para$percent_HH_cc)

  ###############################
  # parameters relevant to transmission
  ###############################

  para <- assign_para(para, "E_0",  2) # number of initial importation
  para <- assign_para(para, "infect_sz", (-1)*para$pop_sz/1000)  # intervention only starts after X per 1000 individuals are already infected

  R_UK <- 2.7
  num_cc_UK <- 13
  R0_baseline <- R_UK/num_cc_UK # the R0 is measured in affluent region
  para <- assign_para(para, "R0", R0_baseline * para$num_cc)  # R naught
  # Transmission parameter & tool availability
  if (para$setting==1) {
    para <- assign_para(para, "symptom_report", 0.7)  # precentage infected report symptom
    para <- assign_para(para, "theta", 0.1 )  # quarantine effect: precentage of remaining transmission quarantined individual have
    para <- assign_para(para, "pcr_available", 1000*para$pop_sz/1000) # -1 # daily maximum PCR tests per 1000 population.
    para <- assign_para(para, "ppe_coef", 1) # if people wear ppe, then their transmissibility will be different outside their family
  }else if (para$setting==2) {
    para <- assign_para(para, "symptom_report", 0.7)
    para <- assign_para(para, "theta", 0.1)
    para <- assign_para(para, "pcr_available", 1000*para$pop_sz/1000)
    para <- assign_para(para, "ppe_coef", 1)
  }else if (para$setting==3) {
    para <- assign_para(para, "symptom_report", 0.6)
    para <- assign_para(para, "theta", 0.2)
    para <- assign_para(para, "pcr_available", 1000*para$pop_sz/1000)
    para <- assign_para(para, "ppe_coef", 1)
  }else print ("Parameter setting error")

  # subclinical rate
  para <- assign_para(para, "sub_clini_rate",  0.3)
  para <- assign_para(para, "asym_rate",  0.2) # transmission rate of asymptomatic patient
  para <- assign_para(para, "death_rate", c(0.002,0.002,0.01)) # death rate for three age groups
  para <- assign_para(para, "death_delay", 10) # delay time for a death to be recorded

  # Parameters about Infected pts
  para <- assign_para(para, "ab_test_rate", 0.7) # % accepted ab testing among detected (symptomatic) patients
  para <- assign_para(para, "pcr_test_rate", 0.8)  # % accepted pcr testing among detected (symptomatic) patients

  para <- assign_para(para, "onsetiso", 0.2)  # Isolation compliance rate at onset based on symptom
  para <- assign_para(para, "abiso", 0.9) # Isolation compliance rate among ab testing positive
  para <- assign_para(para, "pcriso", 0.9) # Isolation compliance rate among pcr testing positive

  para <- assign_para(para, "delay_symptom", 1 ) # days of delay after onset to detect symptomatic patients
  para <- assign_para(para, "delay_ab", 8) # days of delay after onset to receive ab test and obtain result
  para <- assign_para(para, "delay_pcr", 4)  # days of delay after onset to report pcr test result

  # Parameters about tracing contects
  para <- assign_para(para, "tracing_cc_onset", 3)  # set how many days we trace close contact back after symptom-based patient detection
  para <- assign_para(para, "tracing_cc_ab", 3)  # set how many days we trace close contact back after a positive ab_test
  para <- assign_para(para, "tracing_cc_pcr", para$delay_pcr) # set how many days we trace close contact back after a positive PCR test

  para <- assign_para(para, "cc_success_symptom", 0.85) # precentage close contact successfully traced after symptom-based patient detection
  para <- assign_para(para, "cc_success_ab", 0.75)  # precentage close contact successfully traced after positive ab test
  para <- assign_para(para, "cc_success_pcr", 0.80) # precentage close contact successfully traced after positive pcr test

  para <- assign_para(para, "qrate_symptom", 0.5) # CC quarantine compliance rate based on symptom
  para <- assign_para(para, "qrate_ab", 0.7) # CC quarantine compliance rate based on positive ab test
  para <- assign_para(para, "qrate_pcr", 0.7)  # CC quarantine compliance rate based on positive pcr test

  # Parameters about testing tools
  para <- assign_para(para, "ab_rate", function(x, t_onset) 1/(1 + exp(7+t_onset-x))) # seroconversion rate from infection day, based on the clinical paper from Yumei Wen
  para <- assign_para(para, "sensitivity_ab", 0.9) # ab test sensitivity
  para <- assign_para(para, "sensitivity_pcr", 0.999)  # pcr test sensitivity
  para <- assign_para(para, "samplefailure_pcr", 0.3)  # pcr sampling failure
  para <- assign_para(para, "sensitivity_antig", 0.8)  # antigen sensitivity is 0.8

  # adjust R0 for next generation matrix
  para <- R0_adjust(para)

  return(para)
}

R0_adjust <- function(para){
  #########################################################################
  # adjust R0 using next generation matrix for 1) young people susceptibility 2) subclinical cases
  #########################################################################
  # ajust for R0
  # norm1 <- (sum(para$age_mix) - para$age_mix[c(1)]/2- sum(para$age_mix[c(2,4)])/4)/sum(para$age_mix)
  # using next generation matrix to compute norm1; only kept terms connected with young people susceptibility
  Cyy <- para$age_mix[c(1)] # number of comtact for young <--> young
  Coy <- sum(para$age_mix[c(2,4)]) # number of comtact for young <--> old
  Coo <- sum(para$age_mix[c(3,5,6)]) # number of comtact for old <--> old
  Ny <- para$age_dist[1] # number of young people, Cyy/Ny is the average number of young contact for a yong person
  No <- sum(para$age_dist[c(2,3)]) # number of young people, Cyy/Ny is the average number of young contact for a yong person
  y_sus <- 0.5 # susceptability of young person
  NGM <- matrix(c(y_sus * Cyy/Ny,  Coy/Ny, y_sus * Coy/No, Coo/No) , nrow = 2)
  trNGM <- sum(diag(NGM))
  detNGM <- det(NGM)
  Spectral_radius_half <- trNGM + (trNGM^2 - 4*detNGM )^0.5

  y_sus <- 1 # susceptability of young person
  NGM <- matrix(c(y_sus * Cyy/Ny,  Coy/Ny, y_sus * Coy/No, Coo/No) , nrow = 2)
  trNGM <- sum(diag(NGM))
  detNGM <- det(NGM)
  Spectral_radius_1 <- trNGM + (trNGM^2 - 4*detNGM )^0.5

  norm1 <- Spectral_radius_half/Spectral_radius_1
  # norm2 account for the redution of the low transmission rate of asymptomatic cases
  norm2 <- 1 - para$sub_clini_rate * (1 - para$asym_rate)
  para$R0_adj <- para$R0/(norm1 * norm2)
  ##################################
  # compute Re
  norm_age_mix_scdst <- para$age_mix_scdst/sum(para$age_mix_scdst)
  para$Re <- para$R0_adj/para$num_cc *
    ((1- norm_age_mix_scdst[c(1)]/2- sum(norm_age_mix_scdst[c(2,4)])/4) * para$num_cc_scdst) *
    norm2 # approximate Re (haven't taken age-susceptibility into account)
  return(para)
}



###############################################
# generate the contact network
###############################################
#' Simulate synthetic population estimate ERGM contact networks.
#'
#' @param para list contains all parameter, which should be from the function init_para; the paramter settings could be modified at this stage
#' @param output character string for the function to save the output into. The function will create and save results into "Networks" directory.
#' @param searched_clustering_number a parameter determin the strength of clustering within the population. Should use default setting unless
#' for very specific testing session (e.g. simulating lockdown scenario where there is no contact except designated shopping time)
#'
#' @return a list of two object: first is an ERGM object which could be used to simulate contact networks; second is a para object which was updated
#' with the synthetic population age and family information.
#' @export
#'
#' @examples
#' para <- list()
#' para$community.pop_sz <- 200
#' para$pop_sz <- 400
#' para <- init_para(para = para)
#' nw <- network_generate(para)
#' plot(simulate(nw[[1]][[1]]))
network_generate <- function(para, output = "example", searched_clustering_number = 4 ){

  if(para$community.pop_sz < para$pop_sz){
    print("Simulating large network -- fitting small networks and aggregate them")
    print("Note: para$pop_sz must be integer times of para$community.pop_sz, otherwise it will be stopped")
    stopifnot(para$pop_sz %% para$community.pop_sz == 0)
    para.community <- para
    para.community$pop_sz <- para$community.pop_sz

    est.community <- NA
    grid_id <- 1
    #################################################################################
    # step 1: fit a small ERGM network to get the coefficients
    #################################################################################
    # perform grid search to get the local clustering coefficient
    while(class(est.community) != "ergm"){
      nw_para <- initiate_nw(para.community)
      nw.ego <- nw_para[[1]]
      para.community <- nw_para[[2]]

      clustering_effect <- (para.community$pop_sz/2*para.community$num_cc_scdst)*(1 - para.community$percent_HH_cc_scdst) * searched_clustering_number
      target.stats <- c(para$community.pop_sz/2 * para$num_cc_scdst * 0.9, para$community.pop_sz/2 * para$num_cc_scdst * para$percent_HH_cc_scdst,
                        (para$age_mix_scdst/sum(para$age_mix_scdst) * para$community.pop_sz/2 * para$num_cc_scdst)[1:5], clustering_effect, clustering_effect)
      try({
        suppressMessages(  est.community <- ergm(nw.ego ~ edges  + nodematch("family") + mm("age", levels2 = -6) + absdiff("clustering_x", pow=2) + absdiff("clustering_y", pow=2),
                                        target.stats = target.stats,control = control.ergm(MCMLE.maxit = 400, SAN.maxit = 1000)) )
        ego.sim100 <- simulate(est.community,nsim = 100)
        sim.stats <- attr(ego.sim100,"stats")
        trgt <- rbind(colMeans(sim.stats), est.community$target.stats)
        deviation_target_statistics <- mean(abs(trgt[1,] - trgt[2,])/trgt[2,])
        if(deviation_target_statistics > 0.05){
          est.community <- NA
          searched_clustering_number <- searched_clustering_number + 1
        }
      })

      if(class(est.community) != "ergm"){
        print(paste0("There might be problem with parameter setting, sample population again. current repeat: ", grid_id))
        grid_id <- grid_id + 1
        }
    }

    nw.full <- initiate_nw(para)[[1]]
    suppressMessages( est.full <- ergm(nw.full ~ edges,
                 target.stats = para$pop_sz/2 * para$num_cc_scdst * 0.1 * para$Non_HH_CC_rate,control = control.ergm(MCMLE.maxit = 400, SAN.maxit = 1000))
    )

    ########################################################
    # Step 2: construct a full size population with many realization of small communitys
    ########################################################
    num_community <- para$pop_sz/para$community.pop_sz
    communities <- network::as.edgelist(simulate(est.community))
    # assigning all the covariates
    nw_attr <- simulate(est.community)
    cov_family <- network::get.vertex.attribute(nw_attr, "family")
    cov_clustering_x <- network::get.vertex.attribute(nw_attr, "clustering_x")
    cov_clustering_y <- network::get.vertex.attribute(nw_attr, "clustering_y")
    cov_AGE <- network::get.vertex.attribute(nw_attr, "age")
    nw_family <- cov_family
    nw_clustering_x <- cov_clustering_x
    nw_clustering_y <- cov_clustering_y
    nw_AGE <- cov_AGE
    for(i in 2:num_community){
      communities <- rbind(communities, network::as.edgelist(simulate(est.community)) + para$community.pop_sz * (i-1))
      nw_family <- c(nw_family, cov_family + max(cov_family) * (i-1))
      nw_clustering_x <- c(nw_clustering_x, cov_clustering_x + (max(cov_clustering_x) - min(cov_clustering_x)) * (i - 1)/sqrt(2) )
      nw_clustering_y <- c(nw_clustering_y, cov_clustering_y + (max(cov_clustering_y) - min(cov_clustering_y)) * (i - 1)/sqrt(2) )
      nw_AGE <- c(nw_AGE, cov_AGE)
    }
    # communities <- rbind(communities, network::as.edgelist(simulate(est.full)))
    # nw_joint <- as.network(communities, directed = F)
    # network::set.vertex.attribute(nw_joint, attrname = "family", nw_family)
    # network::set.vertex.attribute(nw_joint, attrname = "clustering_x", nw_clustering_x)
    # network::set.vertex.attribute(nw_joint, attrname = "clustering_y", nw_clustering_y)
    # network::set.vertex.attribute(nw_joint, attrname = "age", nw_AGE)

    # resetting the population size, extracting family labels
    para.community$pop_sz <- para$pop_sz
    para.community$family_lable <- nw_family
    para.community$clustering_x <- nw_clustering_x
    para.community$clustering_y <- nw_clustering_y
    para.community$AGE <- nw_AGE

    NW_SIM <- list(list(est.community, est.full),para.community,searched_clustering_number, deviation_target_statistics)
  }else{
    print("Directly fitting the ERGM -- not using aggregating small community trick")
    # save the network properties, including AGE, family_lable, geographical location
    nw_para <- initiate_nw(para)
    nw.full <- nw_para[[1]]
    para <- nw_para[[2]]

    est1 <- NA
    grid_id <- 1
    #################################################################################
    # step 1: fit a small ERGM network to get the coefficients
    #################################################################################
    # perform grid search to get the local clustering coefficient
    while(is.na(est1)[1]){
      clustering_effect <- (para$pop_sz/2*para$num_cc_scdst)*(1 - para$percent_HH_cc_scdst) * searched_clustering_number #  14 (25) 23 (20)
      target.stats <- c(para$pop_sz/2 * para$num_cc_scdst, para$pop_sz/2 * para$num_cc_scdst * para$percent_HH_cc_scdst,
                        (para$age_mix_scdst/sum(para$age_mix_scdst) * para$pop_sz/2 * para$num_cc_scdst)[1:5], clustering_effect, clustering_effect)

      try({
        suppressMessages(  est1 <- ergm(nw.full ~ edges  + nodematch("family") + mm("age", levels2 = -6) + absdiff("clustering_x", pow=2) + absdiff("clustering_y", pow=2),
                                        target.stats = target.stats,control = control.ergm(MCMLE.maxit = 400, SAN.maxit = 200)) )
        ego.sim100 <- simulate(est1,nsim = 100)
        sim.stats <- attr(ego.sim100,"stats")
        trgt <- rbind(colMeans(sim.stats), est1$target.stats)
        deviation_target_statistics <- mean(abs(trgt[1,] - trgt[2,])/trgt[2,])
        if(deviation_target_statistics > 0.05){
          est1 <- NA
        }
      })

      searched_clustering_number <- searched_clustering_number + 1 # reduce the searched number if it did not converge; 6 is when there is no geographical clustering effect
      print(paste0("Reduce clustering coefficient, search again; current search: ", grid_id))
      grid_id <- grid_id + 1
    }
    para$clustering_effect <- clustering_effect

    NW_SIM <- list(est1,para,searched_clustering_number, deviation_target_statistics)
  }

  # save the data
  dir.create("Networks")

  save(NW_SIM, file = paste0("Networks/",output,".network.Rdata"))

  return(NW_SIM)
}

# use the function below to simulate contact network for large population -- using many small network to combine
aggregate_simulation <- function(est_nw, para){
  est.community <- est_nw[[1]]
  est.full <- est_nw[[2]]
  num_community <- para$pop_sz/para$community.pop_sz
  communities <- network::as.edgelist(simulate(est.community))
  for(i in 2:num_community){
    communities <- rbind(communities, network::as.edgelist(simulate(est.community)) + para$community.pop_sz * (i-1))
  }
  communities <- rbind(communities, network::as.edgelist(simulate(est.full)))
  nw_joint <- network::as.network(communities, directed = F)
  return(nw_joint)
}


initiate_nw <- function(para){
  para$AGE <- unlist(sapply(1:length(para$age_dist), function(x) rep(x,round(para$age_dist[x] * para$pop_sz))))
  if(length(para$AGE) < para$pop_sz){ # there could be rounding issue
    new_sp <- sample(c(1,2,3), size = (para$pop_sz - length(para$AGE)),  prob = para$age_dist)
    para$AGE <- c(para$AGE, new_sp)
  }
  stopifnot(length(para$AGE) == para$pop_sz)
  nw <- network::network.initialize(n = para$pop_sz , directed = FALSE)
  # specify house hold
  adult_idx <- which(para$AGE == 3)
  family_core <- sample(adult_idx, para$pop_sz/para$family_sz, replace =F)# a family should have at least one adult
  # assign other adults randomly
  non_core_adult_idx <- adult_idx[! adult_idx %in% family_core]
  # family_adults <- split(non_core_adult_idx, sample(para$pop_sz/para$family_sz, length(non_core_adult_idx), replace = TRUE) )

  non_core_idx <- sample(c(which(para$AGE <= 2), non_core_adult_idx))
  # comupte the split point for family size
  family_sz_label <- cumsum(para$HH_dist)

  # assigning family labels
  family_lable <- rep(NA, para$pop_sz)
  family_lable[family_core] <- 1:(para$pop_sz/para$family_sz)

  # using a rolling rotation to create family based on the household size distribution
  for(sz_gp in 1:2){
    # for each group 2-3/4-5, I assign half of family group 2-3 to be 2 and the other half to be 3
    idx1 <- floor(family_sz_label[sz_gp] * para$pop_sz/para$family_sz):(para$pop_sz/para$family_sz)
    family_lable[non_core_idx[1:(idx1[length(idx1)] - idx1[1] + 1)]] <-  idx1
    non_core_idx <- non_core_idx[(1+length(idx1)):length(non_core_idx)]
    # pick half of thsese families to assiagn another member: the result will be in the family of size (2-3, that's the data we have unfortunately),
    # half of then are havign 2 members and the other half have 3 members
    idx2 <- floor((family_sz_label[sz_gp] + family_sz_label[sz_gp+1])/2 * para$pop_sz/para$family_sz):(para$pop_sz/para$family_sz)
    family_lable[non_core_idx[1:(idx2[length(idx2)] - idx2[1] + 1)]] <-  idx2
    non_core_idx <- non_core_idx[(1+length(idx2)):length(non_core_idx)]
  }
  # assign the remaining people randomly to those family with more than 5 people
  family_lable[non_core_idx] <- sample(floor(family_sz_label[3] * para$pop_sz/para$family_sz + 1):(para$pop_sz/para$family_sz),length(non_core_idx), replace = T)

  # use a spiral function to create the location grid: each household will be roughly equal distance
  phi <- c(1)
  for(i in 2:(para$pop_sz/para$family_sz)){
    phi[i] <- phi[i-1] +1/phi[i-1]
  }
  clustering_x <- phi * cos(phi *pi) + rnorm(length(phi))
  clustering_x <- clustering_x[family_lable]
  clustering_y <- phi * sin(phi *pi) + rnorm(length(phi))
  clustering_y <- clustering_y[family_lable]

  para$family_lable <- family_lable
  para$clustering_x <- clustering_x
  para$clustering_y <- clustering_y


  # this family and age label will be add to the population
  nw <- network::set.vertex.attribute(nw, "family", family_lable)
  nw <- network::set.vertex.attribute(nw, "age", para$AGE)
  # nw <- network::set.vertex.attribute(nw, "clustering", c(clustering_x,clustering_y))
  nw <- network::set.vertex.attribute(nw, "clustering_x", clustering_x)
  nw <- network::set.vertex.attribute(nw, "clustering_y", clustering_y)
  return(list(nw, para))
}


###############################################
# wrapper for transmission simulation
###############################################
#' Simulate transmission using contact network.
#'
#' @param NW_SIM A list of two object, which is the output of network_generate function.
#' @param input_location A file location to read the NW_SIM object.
#' @param PCR logical True means PCR test is deployed for the containment
#' @param RDT logical True means rapid diagnostic tests are deployed for the containment
#' @param len_sim numeric number of days to simulate for the outbreak, default 100.
#'
#' @return a dataframe contains the simulated results.
#' @export
#'
#' @examples
#' para <- list()
#' para$community.pop_sz <- 200
#' para <- init_para(para = para)
#' nw <- network_generate(para)
#' rslt <- simulate_transmission(PCR = FALSE, RDT = FALSE) # simulate a transmission without any containment
simulate_transmission <- function(NW_SIM = NA, input_location = "Networks/example.network.Rdata", PCR = F, RDT = F, len_sim = 100){
  if(is.na(NW_SIM)[1]){
    if(stringr::str_detect(input_location, ".network.Rdata")){
      load(input_location)
    }else{
      load(paste0(input_location, ".network.Rdata"))
    }
  }
  ergm.fitting <- NW_SIM[[1]]
  para <- NW_SIM[[2]]
  para$len_sim <- len_sim

  if(RDT){
    para$ab_test_rate  <- para$pcr_test_rate # consent for antigen
    para$ab_rate <- function(x, t_onset) (1-para$samplefailure_pcr) # failure rate of sample for antigen
    # para$abiso # same as ab
    para$sensitivity_ab <- para$sensitivity_antig # sensitivity is 0.8
    para$tracing_cc_ab <- para$tracing_cc_onset
    para$delay_ab <- para$delay_symptom # we do the test as soon as the symptom is discovered
  }else if(!RDT && !PCR){
    para$onsetiso <- 0
  }

  # C is the contact matrix, which is traced for 7 days. If i,j element is 7, it means the latest contact of i,j is today,
  # if it is 0, then there is no contact between i,j in the past 7 days,
  C <- matrix(0, para$pop_sz,para$pop_sz)

  # I is a vector indicating when an individual has been infected
  # NA means susceptible, 0 means infected date
  I <- matrix(NA, para$pop_sz,1)
  trace_inf_n <- matrix(0, para$pop_sz,1)
  # Z is a vector indicating if the infected is detectable
  Z <- matrix(F, para$pop_sz,1)

  # onset just save the incubation time
  O <- matrix(NA, para$pop_sz,1)

  # Q is the vector indicating when an individual has been quarantine, NA means not quarantine, 1 means first day
  Q <- matrix(NA, para$pop_sz,1)
  RDT_n <- 0 # total number of RDT used
  PCR_n <- 0 # total number of PCR used
  rdt <- matrix(0, len_sim)
  pcr <- matrix(0, len_sim)

  # stop saving list of C matrix as it consume lots of disk
  # C_lst <- list()
  O_lst <- list()
  I_lst <- list()
  Q_lst <- list()

  # initial case
  # para$E_0 <- floor(runif(1,min = 1, max = 3))
  init_idx <- sample(1:para$pop_sz, para$E_0)
  I[init_idx] <- 0
  O[init_idx] <- incub(para$E_0)
  Z[init_idx] <- F
  for(t in 1:len_sim){
    # C_lst[[t]] <- C

    # add another round of infection to get the correct contact scaling
    if(!is.null(para$cc_cyl) && para$cc_cyl > 1){
      C_WD <- C_update(C, Q, ergm.fitting, para, WD_flg = T)
      lst <- I_O_update(I, Q, C_WD, O, Z, trace_inf_n, para, WD_flg = T)
      I <- lst[[1]]
      O <- lst[[2]]
      Z <- lst[[3]]
      trace_inf_n <- lst[[4]]
    }
    C <- C_update(C, Q, ergm.fitting, para)
    lst <- I_O_update(I, Q, C, O, Z, trace_inf_n, para)
    I <- lst[[1]]
    O <- lst[[2]]
    Z <- lst[[3]]
    trace_inf_n <- lst[[4]]
    # combine the contact network from two rounds of simulation
    if(!is.null(para$cc_cyl) && para$cc_cyl > 1){
      C <- C_unify(C, C_WD)
    }

    I_lst[[t]] <- I
    O_lst[[t]] <- O
    lst1 <- Q_update(I, Q, C, O, Z, RDT_n, PCR_n, para, flg_ab = RDT, flg_multi_ab = FALSE, flg_PCR=PCR, PCR_need_yesterday)
    Q <- lst1[[1]]
    RDT_n <-lst1[[2]]
    PCR_n <-lst1[[3]]
    PCR_need_yesterday <- lst1[[4]]
    Q_lst[[t]] <- Q
    rdt[t] <- RDT_n
    pcr[t] <- PCR_n
  }
  Rt <- rep(NA, len_sim)
  death <- rep(0, len_sim)
  for(t in 1:len_sim){
    i <- I_lst[[t]]
    idx <- which(i == 2) # from the second day they are infectious
    if(length(idx)) Rt[t] <- mean(trace_inf_n[idx])
    # compute death
    death_case <- which(i == para$death_delay)
    if(length(death_case)) death[t] <- sum(runif(length(death_case)) < para$death_rate[para$AGE[death_case]])
  }
  # plot the daily new case
  ab_new_daily <- rep(NA, len_sim)
  cap <- rep(NA, len_sim) # store the Q numbers
  ab_new_daily[1] <- para$pop_sz - sum(is.na(I_lst[[1]]))
  cap[1] <- sum(!is.na(Q_lst[[1]]) & Q_lst[[t]] < 14)
  for(t in 2:len_sim){
    ab_new_daily[t] <- sum(is.na(I_lst[[t-1]]))  - sum(is.na(I_lst[[t]]))
    cap[t] <- sum(!is.na(Q_lst[[t]]) & Q_lst[[t]] < 14)
  }
  return(data.frame(new_daily_case = ab_new_daily, quarantine_daily = cap, Re_daily = Rt, RDT_used = rdt, PCR_used = pcr, death_daily = death))

}

###############################################
# Transmission update functions
###############################################
# update C
# the function below provide a way to update daily contact
C_update <- function(C, Q, est_nw, para, WD_flg = F){
  # one day pass; C is 12 when just infected, then C decrease by 1 everyday pass
  C <- pmax(C-1, 0) # if within day flag WD_flg is true, don't increase the day
  # then update close contact of this day. we assume the close contact to happen between the people around
  ##########################
  # simulated an actual net from the egocentric estimation
  # population.ego.sim <- data.frame(family = para$family_lable, age = para$AGE, clustering_x = para$clustering_x, clustering_y = para$clustering_y)
  if(class(est_nw) == "list"){
    edge_lst <- network::as.edgelist(aggregate_simulation(est_nw, para))
  }else{
    edge_lst <- network::as.edgelist(simulate(est_nw))
  }
  # cc store the distance from the close contact to index case for each close contact of a particular day

  prob_cc <- ifelse(!is.na(Q[edge_lst[,1]]), para$theta, 1) * ifelse(!is.na(Q[edge_lst[,2]]),para$theta, 1)
  true_cc <- (runif(dim(edge_lst)[1]) < prob_cc)
  C[cbind(edge_lst[true_cc,1], edge_lst[true_cc,2])] <- 12
  return(C)
}

# for multiple infection round per day -- use below function to unify the contact structure
C_unify <- function(C_ref, C_WD){
  # insert all the C_WD == 12 into C_ref
  C_ref[which(C_WD == 12)] <- 12
  return(C_ref)
}


# update the Infected individual & keep an record of the incubation time
I_O_update <- function(I, Q, C, O, Z, trace_inf_n, para, WD_flg = F){
  if(!WD_flg){
    I <- I + 1 # if within day flag WD_flg is true, don't increase the day
  }
  for(i in 1:para$pop_sz){
    # i is the infection source, idx is the infected
    # cc_idx find the infected individual and he has contact someone that day
    # (our model is almost certain to have contact, unless he is quarantined)
    # C[-i,i] "-i" is to avoid count the index case himself
    if( !is.na(I[i]) & (length(which(C[-i,i] == 12)) | length(which(C[i,-i] == 12)) ) ){
      cc_idx <- union(which(C[-i,i] == 12), which(C[i,-i] == 12))
      u <- runif(length(cc_idx))
      # get the individual transmissibility, it need to be normalized by the number of individuals
      transmissibility <- g_transmissibility(O[i], para) * ifelse(Z[i] == 1, para$asym_rate, 1)
      # pick the index of individual 1) who is not yet infected & 2) transmited
      # the expected transimisstion rate need to be normalized by daily contact 2*num_cc
      # reduce the sesceptability for young people (AGE == 1) by half
      susceptibility <- ifelse(para$AGE[cc_idx] == 1, .5, 1)
      u1 <- runif(length(cc_idx))
      idx <- cc_idx[(u < transmissibility[I[i]]) & is.na(I[cc_idx]) & u1 < susceptibility]
      I[idx] <- 0 # consider the incubation time
      u1 <- runif(length(idx))
      # if Z=2, the infected case is detectable, if Z=1, subclinical, if Z=F, not reported and infected
      Z[idx] <- sapply(u1, function(x) if(x<para$symptom_report*(1-para$sub_clini_rate)) 2 else if(x<(1-para$sub_clini_rate)) F else 1)
      O[idx] <- incub(length(idx)) # incubation duration
      # trace the Rt
      trace_inf_n[i] <- trace_inf_n[i] + length(idx)
    }
  }
  return(list(I, O, Z,trace_inf_n))
}
########################################################
# Quarantine functions
########################################################
# this function will quarantine after ab tests
# flg_ab <- FALSE
# flg_multi_ab <- TRUE
# flg_PCR <- TRUE
########################################
Q_update <- function(I, Q, C, O, Z, RDT_n, PCR_n, para, flg_ab, flg_multi_ab, flg_PCR, PCR_need_yesterday){
  Q <- Q + 1
  PCR_need_today <-0
  PCR_n_today <-0
  if (sum(!is.na(I))>para$infect_sz) { # take containment intervention only when the infected number passes the threshold
    # quarantine close contact
    for(i in 1:para$pop_sz){
      if(!is.na(I[i]) & (Z[i] == 2)){ # only when Z == 2 the case is reported at all
        u <- runif(4)

        # case detection strategy
        ok_onset_iso <- (I[i]-O[i]) == para$delay_symptom &
          u[1] < para$onsetiso
        ok_pcr_test <- flg_PCR &
          (PCR_n_today <= para$pcr_available) &
          ((I[i]-O[i]) == para$delay_pcr) &
          u[2] < para$pcr_test_rate
        ok_ab_test <-  flg_ab &
          ((I[i]-O[i]) == para$delay_ab) &
          u[3] < para$ab_test_rate
        ok_multiab_test <- flg_multi_ab &
          ((I[i]-O[i]) == 6 | (I[i]-O[i]) == 8 | (I[i]-O[i]) == 10) &
          u[4] < para$ab_test_rate

        # quarantine strategy
        # 1. test with RDT
        if(ok_ab_test & is.na(Q[i])){
          RDT_n <- RDT_n + 1
          u <- runif(2)
          seroconvert <- para$ab_rate(I[i],O[i]) # if the test shows positive
          if(u[1] < para$abiso & u[2] < para$sensitivity_ab*seroconvert){ # if the infected is isolated, quarantine their close contact
            Q[i] <- 1
            cc <- union(which(C[-i,i] >= 12 - para$tracing_cc_ab), which(C[i,-i] >= 12 - para$tracing_cc_ab))
            u <- runif(length(cc))
            # make sure the cc_success_rate for family is always 1
            tracing_rate <- ifelse(para$family_lable[i] == para$family_lable[cc], 1, para$cc_success_ab)
            Q[cc[is.na(Q[cc]) & u < tracing_rate * para$qrate_ab]] <- 1
          }
        } else if( ok_multiab_test & is.na(Q[i])){
          RDT_n <- RDT_n + 1
          u <- runif(2)
          seroconvert <- para$ab_rate(I[i],O[i]) # if the test shows positive
          if(u[1] < para$abiso & u[2] < para$sensitivity_ab*seroconvert){ # if the infected is isolated, quarantine their close contact
            Q[i] <- 1
            cc <- union(which(C[-i,i] >= 12 - (I[i]-O[i])), which(C[i,-i] >= 12 - (I[i]-O[i])))
            u <- runif(length(cc))
            tracing_rate <- ifelse(para$family_lable[i] == para$family_lable[cc], 1, para$cc_success_ab)
            Q[cc[is.na(Q[cc]) & u < tracing_rate * para$qrate_ab]] <- 1
          }
        }

        # 2. encourage non-positive to quarantine
        if(ok_onset_iso & is.na(Q[i])){
          Q[i] <- 1
          cc <- union(which(C[-i,i] >= 12 - para$tracing_cc_onset), which(C[i,-i] >= 12- para$tracing_cc_onset))
          u <- runif(length(cc))
          # make sure the cc_success_rate for family is always 1
          stopifnot(length(para$family_lable[cc]) == length(cc))
          tracing_rate <- ifelse(para$family_lable[i] == para$family_lable[cc], 1, para$cc_success_symptom)
          #####################################
          # for debug
          # print(length(cc[is.na(Q[cc]) & u < tracing_rate * para$qrate_symptom]))
          #####################################
          Q[cc[is.na(Q[cc]) & u < tracing_rate * para$qrate_symptom]] <- 1
        }

        # perform PCR on the rest
        if(ok_pcr_test & is.na(Q[i])){
          PCR_need_today <- PCR_need_today + 1
          pcravailable <- ifelse(para$setting==3,
                                 (runif(1)<(para$pcr_available/(PCR_need_yesterday+1)*2)),
                                 T) # in a slum, when PCR is not sufficient, smaller id people have a higher chance of geting the PCR test
          if (pcravailable) {
            PCR_n <- PCR_n + 1
            PCR_n_today <- PCR_n_today +1
            u <- runif(2)
            if(u[1] < para$pcriso & u[2] < para$sensitivity_pcr*(1-para$samplefailure_pcr)){ # if the infected is isolated, quarantine their close contact
              Q[i] <- 1
              cc <- union(which(C[-i,i] >= 12 - para$tracing_cc_pcr), which(C[i,-i] >= 12 - para$tracing_cc_pcr))
              u <- runif(length(cc))
              # make sure the cc_success_rate for family is always 1
              tracing_rate <- ifelse(para$family_lable[i] == para$family_lable[cc], 1, para$cc_success_pcr)
              Q[cc[is.na(Q[cc]) & u < tracing_rate * para$qrate_pcr]] <- 1

            }
          }
        }

      }
    }
    # release those after 14 days of quarantine
    for(i in 1:para$pop_sz){
      # we only release those who are do not have onset: if they do have onset,
      # they will be keep quarantined (this is our strategy to calculate medical needs)
      if(!is.na(Q[i]) & Q[i] >= 14){
        if(is.na(I[i])){
          Q[i] <- NA
        }else if(I[i] < O[i]){ # so this guys haven't develop any symptom, and will be free
          Q[i] <- NA
        }
      }
    }
  }
  return(list(Q,RDT_n,PCR_n,PCR_need_today))
}
Xilin-Jiang/NetworkCOVID19 documentation built on April 2, 2022, 5:33 p.m.