What are the key features of real life that the model should capture?

Age-mixing

  1. WSD of age gaps
  2. BSD of age gaps
  3. slope
  4. intercept

Important question: should we only have “general population” targets, or also hiv-status and gender specific targets? For now, only general male population.

Sexual activity:

Degree distribution of new sex partners in the last year: 5. shape parameter (r) of the negative binomial distribution = 1.29 6. scale parameter (theta) of the negative binomial distribution = 0.66 (theta = p/(1-p)) (For now we are not taking age effect into account, but if we were, e.g. via GAMM or other multi-parameter model): Extract age effect: Fitting NB distribution to data on number of new relationships in last year, with age covariate.

library(dplyr)
library(MASS)
library(splines)
library(boot)
library(haven)
library(ggplot2)
library(fitdistrplus)
library(lmtest)
library(mclust)
library(depth)

root.path <- dirname(getwd())

relations.data.path <- paste0(root.path, "/MiceABC/Sex Network Master (participant level) 09.08.2012.dta")

data.relations <- read_dta(file = relations.data.path)
#names(data.relations)
data.needed <- dplyr::select(data.relations, c(id,
                                     age,
                                     race,
                                     gender,
                                     cpcount,
                                     ompcount,
                                     hivtestresult,
                                     totalpartners))
data.needed$cpcount[is.na(data.needed$cpcount)] <- 0
data.needed$ompcount[is.na(data.needed$ompcount)] <- 0

#########################################################################################
# data.filtered is the dataset for the partner turnover analysis

data.filtered <- dplyr::filter(data.needed,
                               race == 2,
                               gender != "2",
                               age >= 15,
                               age < 50)

data.filtered$totalpartnerslstyr <- data.filtered$cpcount +
  data.filtered$ompcount # Total NEW relationships formed in the last year

data.filtered$age0 <- data.filtered$age - min(data.filtered$age)

# We create 4 subsets, one for each race-gender stratum
md.wideBM <- dplyr::filter(data.filtered,
                           race == 2,
                           gender == "0")    # Let's ONLY WORK WITH THIS STRATUM

md.wideBW <- dplyr::filter(data.filtered,
                           race == 2,
                           gender == "1")

###################################
#Negative binomial model
###################################
# Model for black men in the last year
intercept.Bm <- glm.nb(totalpartnerslstyr~1,
                       data = md.wideBM,
                       control = glm.control(maxit = 100000, trace = 3),
                       init.theta = 1)
summary(intercept.Bm) # intercept exp(0.25), theta = 0.65
shape.nb <- exp(as.numeric(intercept.Bm$coefficients)) #(r)
scale.nb <- as.numeric(intercept.Bm$theta) #(theta = p/(1-p))

# Or alternatively we can fit the negative binomial like this:
fit.negbin <- fitdist(as.numeric(md.wideBM$totalpartnerslstyr), "nbinom")

fit.pois <- fitdist(as.numeric(md.wideBM$totalpartnerslstyr), "pois")
r.pois <- rpois(n = 1000000, lambda = as.numeric(fit.pois$estimate))


# A negative binomial distribution is the result of gamma-distributed Poisson processes.
r.lambdas <- rgamma(n = 1000000, shape = shape.nb, scale = scale.nb)
r.negbinom <- rpois(n = length(r.lambdas), lambda = r.lambdas)
random.nb <- rnbinom(n = 10000, size = shape.nb, mu = shape.nb * scale.nb)

hist(md.wideBM$totalpartnerslstyr)
hist(r.negbinom)
hist(random.nb)
mean(md.wideBM$totalpartnerslstyr)
mean(r.negbinom)
mean(random.nb)

round(prop.table(table(as.numeric(md.wideBM$totalpartnerslstyr))), 2)
round(prop.table(table(random.nb)), 2)
round(prop.table(table(r.negbinom)), 2)
round(prop.table(table(r.pois)), 2)


finalBm <- glm.nb(totalpartnerslstyr~ns(age0, 3),
                data=md.wideBM,
                control=glm.control(maxit=100000, trace = 3),
                init.theta=0.5)

summary(finalBm)

lrtest(intercept.Bm, finalBm)

###############################################################################
# Made up data set to predict the expected number of sexual partners per age
##############################################################################
madeforBM <- data.frame(age0 = 0:34, gender = "man", race = "African") # ~ 15 to 49 year old men

madeforBM$totalpartnerslstyr <- NA
########################################################################################
tryBM <- predict(finalBm, newdata=madeforBM, type="link", se.fit = TRUE)
madeforBM$try <- exp(tryBM$fit);
madeforBM$try.low <- exp(tryBM$fit - 1.96 * tryBM$se.fit);
madeforBM$try.high <- exp(tryBM$fit + 1.96 * tryBM$se.fit);


### Final step: plotting the results
plot(seq(15, 49), madeforBM$try, type="l", col="navyblue",main="Black men",xlab="Age",ylab="Partnerslastyear", ylim = c(0, 4))
#points(seq(15, 49), madeforBM$try.low, col="navyblue")
#points(seq(15, 49), madeforBM$try.high, col="navyblue")
  1. Concurrency: Beauclair JIAS 2015:Point-prevalence of concurrent partnerships 6 months before survey was 13.4% among black men

Epidemic trajectory

HIV prevalence by age and gender, by age-groups in 2011 (30 years after introducing HIV) as per Beauclair 2017 (journal unknown yet) 8. HIV prevalence in women < 25 yo: 0.2 9. HIV prevalence in men < 25 yo: 0.1 10. HIV prevalence in women 25 <= x < 35: 0.42 11. HIV prevalence in men 25 <= x < 35: 0.17 12. HIV prevalence in women 35 <= x < 45: 0.37 13. HIV prevalence in men 35 <= x < 45: 0.24

wsd.general <- 3
bsd.general <- 1.5
slope.general <- 0.7
intercept.general <- -1 # average age of partners of 15 year-old boys is 16
shape.nb <- 1.29
scale.nb <- 0.66
pp.cp.6months.men <- 0.134
hiv.prev.lt25.women <- 0.2
hiv.prev.lt25.men <- 0.1
hiv.prev.25.34.women <- 0.42
hiv.prev.25.34.men <- 0.17
hiv.prev.35.44.women <- 0.37
hiv.prev.35.44.men <- 0.24
library(devtools)
#install_github("wdelva/RSimpactHelp")
library(RSimpactCyan)
library(RSimpactHelper)
library(ggplot2)
library(GGally)
library(lmtest)
library(mice)
library(miceadds)
library(parallel)
library(randtoolbox)
library(EasyABC)
library(dplyr)
library(tidyr)
library(nlme)
library(lme4)
library(boot)
library(data.table)


# Let's create a colour palette that is colourblind-friendly
# The palette with black:
cb.Palette <- c("#F0E442", "#D55E00", "#0072B2", "#E69F00", "#999999")
#                yellow      red         blue     orange      grey



# First we define a simpact wrapper function that takes as argument a vector of model parameter values and returns a vector of summary statistics



# A wrapper function to run Simpact once
# input.vector <- c(0, 3, -0.5, 0)
simpact.wrapper.master <- function(inputvector = input.vector){
  library(RSimpactHelper)
  library(RSimpactCyan)
  library(magrittr)
  library(dplyr)
  library(nlme)

  agemixing.lme.errFunction <- function(e)
  {
    return(list())
  }

  agemixing.lme.fitter <- function(data = dplyr::filter(agemix.model[[1]], Gender =="male")){
    men.lme <- lme(pagerelform ~ agerelform0,
                   data = data,
                   control=lmeControl(maxIter=10000, returnObject=TRUE, opt = "optim"),
                   random = ~1 | ID,
                   method = "REML",
                   weight = varPower(value = 0.5, form = ~agerelform0 + 1))
  }


  scenario <- function(interventionlist, tasp.indicator) {
    switch(tasp.indicator+1, interventionlist[1:4], interventionlist[1:5])
  } 



  cd4.atARTinit <- function(datalist = datalist,
                            agegroup = c(15, 30),
                            timewindow = c(15, 30),
                            cd4count=350, site="All"){

    cd4.atARTinit <- age.group.time.window(datalist = datalist,
                                           agegroup = agegroup,
                                           timewindow = timewindow, site="All")

    cd4.atARTinit <- subset(cd4.atARTinit, TreatTime !=Inf) #HIV positive individuals

    raw.df <- data.frame(cd4.atARTinit)
    art.df <- subset(datalist$ttable, ID %in% cd4.atARTinit$ID &
                       TStart > timewindow[1] & TStart < timewindow[2])

    ##What if the person dropped out and come back again?
    art.df <- data.frame(dplyr::summarise(dplyr::group_by(art.df, ID, Gender),
                                          CD4atARTstart = min(CD4atARTstart)))

    #indicate those who started their treatment when their CD4 count was below a given threshold
    art.df <- art.df %>% dplyr::mutate(ART.start.CD4 = CD4atARTstart < cd4count)

    # Now we apply the left_join dplyr function to add the ART status to raw.df.
    raw.df <- dplyr::left_join(x = raw.df, y = art.df, by = c("ID", "Gender"))

    #provide a summary of those that are on treatment and those that started below a threshold
    cd4count.atARTInit <- data.frame(dplyr::summarise(dplyr::group_by(raw.df, Gender),
                                                      TotalCases = n(),
                                                      LessCD4initThreshold =sum(ART.start.CD4)))

    return(cd4count.atARTInit)
  }
  root.path <- dirname(getwd())
  destDir <- paste0(root.path, "/temp")
  age.distr <- agedistr.creator(shape = 5, scale = 65)
  cfg.list <- input.params.creator(population.eyecap.fraction = 0.21,#1,
                                   population.simtime = 40,
                                   population.nummen = 2500,
                                   population.numwomen = 2500,
                                   hivseed.time = 10,
                                   hivseed.type = "amount",
                                   hivseed.amount = 30,
                                   hivseed.age.min = 20,
                                   hivseed.age.max = 50,
                                   hivtransmission.param.a = -1,
                                   hivtransmission.param.b = -90,
                                   hivtransmission.param.c = 0.5,
                                   hivtransmission.param.f1 = log(2),
                                   hivtransmission.param.f2 = log(log(1.4) / log(2)) / 5,
                                   formation.hazard.agegapry.gap_factor_man_age = -0.01, #-0.01472653928518528523251061,
                                   formation.hazard.agegapry.gap_factor_woman_age = -0.01, #-0.0726539285185285232510561,
                                   formation.hazard.agegapry.meanage = -0.025,
                                   formation.hazard.agegapry.gap_factor_man_const = 0,
                                   formation.hazard.agegapry.gap_factor_woman_const = 0,
                                   formation.hazard.agegapry.gap_factor_man_exp = -1, #-6,#-1.5,
                                   formation.hazard.agegapry.gap_factor_woman_exp = -1, #-6,#-1.5,
                                   formation.hazard.agegapry.gap_agescale_man = 0.25,
                                   formation.hazard.agegapry.gap_agescale_woman = 0.25,#-0.30000007,#-0.03,
                                   debut.debutage = 15,
                                   conception.alpha_base = -2.5#,
                                   #person.art.accept.threshold.dist.fixed.value = 0
  )
  # cfg.list["diagnosis.baseline"] <- 100
  # cfg.list["diagnosis.HSV2factor"] <- 0
  # cfg.list["diagnosis.agefactor"] <- 0
  # cfg.list["diagnosis.beta"] <- 0
  # cfg.list["diagnosis.diagpartnersfactor"] <- 0
  # cfg.list["diagnosis.genderfactor"] <- 0
  # cfg.list["diagnosis.isdiagnosedfactor"] <- 0
  # cfg.list["diagnosis.t_max"] <- 200
  cfg.list["formation.hazard.agegapry.baseline"] <- 2
  cfg.list["mortality.aids.survtime.C"] <- 65
  cfg.list["mortality.aids.survtime.k"] <- -0.2
  cfg.list["monitoring.fraction.log_viralload"] <- 0.3
  cfg.list["dropout.interval.dist.uniform.min"] <- 100
  cfg.list["dropout.interval.dist.uniform.max"] <- 200

  cfg.list["person.agegap.man.dist.type"] <- "normal" #fixed
  #cfg.list["person.agegap.man.dist.fixed.value"] <- -6
  cfg.list["person.agegap.woman.dist.type"] <- "normal" #"fixed"
  #cfg.list["person.agegap.woman.dist.fixed.value"] <- -6

  cfg.list["mortality.aids.survtime.C"] <- 65
  cfg.list["mortality.aids.survtime.k"] <- -0.2
  #cfg.list["monitoring.cd4.threshold"] <- 0
  cfg.list["person.agegap.man.dist.normal.mu"] <- 0
  cfg.list["person.agegap.woman.dist.normal.mu"] <- 0
  cfg.list["person.agegap.man.dist.normal.sigma"] <- inputvector[2] ######### 3
  cfg.list["person.agegap.woman.dist.normal.sigma"] <- inputvector[2] ######### 3

  cfg.list["person.survtime.logoffset.dist.type"] <- "normal"
  cfg.list["person.survtime.logoffset.dist.normal.mu"] <- 0
  cfg.list["person.survtime.logoffset.dist.normal.sigma"] <- 0.1

  cfg <- cfg.list
  seedid <- inputvector[1]
  #cfg["person.agegap.man.dist.fixed.value"] <- -2 # inputvector[2]
  #cfg["person.agegap.woman.dist.fixed.value"] <- -2 # inputvector[2]
  cfg["formation.hazard.agegapry.gap_factor_man_exp"] <- inputvector[3] ######### -0.5
  cfg["formation.hazard.agegapry.gap_factor_woman_exp"] <- inputvector[3] ######### -0.5

  # Let's introduce ART, and evaluate whether the HIV prevalence drops less  rapidly
  art.intro <- list()
  art.intro["time"] <- 25
  art.intro["person.art.accept.threshold.dist.fixed.value"] <- inputvector[4] ######### 0.5
  art.intro["diagnosis.baseline"] <- 0#100
  art.intro["monitoring.cd4.threshold"] <- 100 # 1200
  #art.intro["monitoring.interval.piecewise.cd4s"] <- "0,1300"


  # Gradual increase in CD4 threshold. in 2007:200. in 2010:350. in 2013:500

  art.intro2 <- list()
  art.intro2["time"] <- 25 + inputvector[5] ######### 30
  art.intro2["monitoring.cd4.threshold"] <- 200

  art.intro3 <- list()
  art.intro3["time"] <- inputvector[4] + inputvector[5] + inputvector[6] ########### 33
  art.intro3["monitoring.cd4.threshold"] <- 350

  art.intro4 <- list()
  art.intro4["time"] <- inputvector[4] + inputvector[5] + inputvector[6] + inputvector[7] ########### 36
  art.intro4["monitoring.cd4.threshold"] <- 500

  art.intro5 <- list()
  art.intro5["time"] <- 38
  art.intro5["monitoring.cd4.threshold"] <- 5000 # This is equivalent to immediate access
  art.intro5["person.art.accept.threshold.dist.fixed.value"] <- inputvector[8] ########### 0.75

  tasp.indicator <- inputvector[9] # 1 if the scenario is TasP, 0 if the scenario is current status

  interventionlist <- list(art.intro, art.intro2, art.intro3, art.intro4, art.intro5)

  intervention <- scenario(interventionlist, tasp.indicator)

  results <- simpact.run(configParams = cfg,
                         destDir = destDir,
                         agedist = age.distr,
                         seed = seedid, #, Introducing ART has helped to keep the prevalence high
                         intervention = intervention)

  datalist.agemix <- readthedata(results)
  agemix.df <- agemix.df.maker(datalist.agemix)

  agemix.model <- pattern.modeller(dataframe = agemix.df,
                                   agegroup = c(15, 50),
                                   timepoint = datalist.agemix$itable$population.simtime[1],
                                   timewindow = 3)

  men.lme <- tryCatch(agemixing.lme.fitter(data = dplyr::filter(agemix.model[[1]], Gender =="male")),
                      error = agemixing.lme.errFunction) # Returns an empty list if the lme model can't be fitted

  bignumber <- NA # let's try if NA works (instead of 9999 for example)

  AAD <- ifelse(length(men.lme) > 0, mean(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber)
  SDAD <- ifelse(length(men.lme) > 0, sd(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber)
  powerm <- ifelse(length(men.lme) > 0, as.numeric(attributes(men.lme$apVar)$Pars["varStruct.power"]), bignumber)
  slopem <- ifelse(length(men.lme) > 0, summary(men.lme)$tTable[2, 1], bignumber)
  WVAD.base <- ifelse(length(men.lme) > 0, men.lme$sigma^2, bignumber)
  BVAD <- ifelse(length(men.lme) > 0, getVarCov(men.lme)[1,1], bignumber)

  #agegap.mean <- mean(datalist.agemix$rtable$AgeGap)
  #agegap.sd <- sd(datalist.agemix$rtable$AgeGap)
  hivprev.15.50 <- prevalence.calculator(datalist = datalist.agemix,
                                         agegroup = c(15, 50),
                                         timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[3]
  # ART coverage vector
  ART.cov.eval.timepoints <- seq(from = intervention[[1]]$time,
                                 to = datalist.agemix$itable$population.simtime[1])
  ART.cov.vector <- rep(NA, length(ART.cov.eval.timepoints))
  for (art.cov.index in 1:length(ART.cov.vector)){
    ART.cov.vector[art.cov.index] <- sum(ART.coverage.calculator(datalist = datalist.agemix,
                                                                 agegroup = c(15, 50),
                                                                 timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.onART) /
      sum(ART.coverage.calculator(datalist = datalist.agemix,
                                  agegroup = c(15, 50),
                                  timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.cases)
  }
  # Vector of the fraction of people who started their treatment when their CD4count was between specified values
  cd4.atARTinit.fractions.timepoints <- seq(from = intervention[[1]]$time + 5,
                                            to = datalist.agemix$itable$population.simtime[1],
                                            by = 5) #30, 35, 40
  cd4.any.vector <- cd4.lt100.vector <- cd4.100.200.vector <- cd4.200.350.vector <- cd4.350.500.vector <- cd4.500plus.vector <- rep(NA, length(cd4.atARTinit.fractions.timepoints))
  for (cd4.index in 1:length(cd4.lt100.vector)){
    cd4.lt100.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
                                                 TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
                                                 TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
                                                 CD4atARTstart < 100) %>% select(ID) %>% unique() %>% nrow()
    cd4.100.200.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
                                                   TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
                                                   TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
                                                   CD4atARTstart >= 100,
                                                   CD4atARTstart < 200) %>% select(ID) %>% unique() %>% nrow()
    cd4.200.350.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
                                                   TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
                                                   TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
                                                   CD4atARTstart >= 200,
                                                   CD4atARTstart < 350) %>% select(ID) %>% unique() %>% nrow()
    cd4.350.500.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
                                                   TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
                                                   TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
                                                   CD4atARTstart >= 350,
                                                   CD4atARTstart < 500) %>% select(ID) %>% unique() %>% nrow()
    cd4.500plus.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
                                                   TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
                                                   TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
                                                   CD4atARTstart >= 500) %>% select(ID) %>% unique() %>% nrow()
    cd4.any.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
                                               TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
                                               TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5)) %>% select(ID) %>% unique() %>% nrow()
  }

  incid.final <- incidence.calculator(datalist = datalist.agemix,
                                      agegroup = c(15, 50),
                                      timewindow = c((datalist.agemix$itable$population.simtime[1] - 2), datalist.agemix$itable$population.simtime[1]),
                                      only.active = "No")$incidence[3]

  growthrate <- pop.growth.calculator(datalist = datalist.agemix,
                                      timewindow = c(0, datalist.agemix$itable$population.simtime[1]))

  outputvector <- c(AAD, SDAD, powerm, slopem, WVAD.base, BVAD, hivprev.15.50, growthrate,
                    ART.cov.vector,
                    cd4.lt100.vector/cd4.any.vector,
                    cd4.100.200.vector/cd4.any.vector,
                    cd4.200.350.vector/cd4.any.vector,
                    cd4.350.500.vector/cd4.any.vector,
                    cd4.500plus.vector/cd4.any.vector,
                    incid.final)
  return(outputvector)
}






simpact.wrapper <- function(inputvector = input.vector){ # This function does not produce the incidence and does not have an input for the model scenario
  library(RSimpactHelper)
  library(RSimpactCyan)
  library(magrittr)
  library(dplyr)
  library(nlme)
  library(lme4)
  library(fitdistrplus)
  library(lmtest)
  library(tidyr)
  library(data.table)

  agemixing.lme.errFunction <- function(e)
  {
    return(list())
  }

  simpact.errFunction <-function(e){
    if (length(grep("NaN",e$message)) != 0){
      return(list())
    }
  }

  # agemixing.lme.fitter <- function(data = dplyr::filter(agemix.model[[1]], Gender =="male")){
  #   men.lme <- lme(pagerelform ~ agerelform0,
  #                  data = data,
  #                  control=lmeControl(maxIter=10000, returnObject=TRUE, opt = "optim"),
  #                  random = ~1 | ID,
  #                  method = "REML",
  #                  weight = varPower(value = 0.5, form = ~agerelform0 + 1))
  # }

  ampmodel <- function(data = dplyr::filter(agemix.model[[1]], Gender =="male")) {
    lmer(pagerelform ~ agerelform0 + (1 | ID),
         data = data,
         REML = TRUE)  
  }



  bvar <- function(model) {

    # Outputs a df with between-subject variance, upr & lwr limits

    # Must take an merMod  object

    bsd <- as.numeric(as.data.frame(VarCorr(model))[1,5])


  }



  degree.df.maker <- function (dataframe.df, agegroup = c(15, 30), hivstatus = 0, 
                               survey.time = 15, window.width = 1, gender.degree = "female", 
                               only.new = TRUE) 
  {
    dataframe.rels.df <- dataframe.df
    dataframe.rels.df <- dplyr::select(dataframe.rels.df, -episodeorder, -FormTime, -DisTime)
    dataframe.rels.df <- unique.data.frame(dataframe.rels.df)
    rels.form.dis.df <- dplyr::summarise(dplyr::group_by(dataframe.df, 
                                                         relid), FormTime = min(FormTime), DisTime = max(DisTime))
    dfnew <- dplyr::left_join(x = dataframe.rels.df, y = rels.form.dis.df, 
                              by = "relid")
    dfnew <- dplyr::filter(dfnew, TOD > survey.time)
    {
      if (hivstatus == 0) {
        dfnew <- dplyr::filter(dfnew, InfectTime > survey.time)
      }
      else if (hivstatus == 1) {
        dfnew <- dplyr::filter(dfnew, InfectTime <= survey.time)
      }
    }
    {
      if (only.new) {
        dfnew <- dplyr::filter(dfnew, FormTime >= survey.time - 
                                 window.width, FormTime < survey.time, DisTime > 
                                 survey.time - window.width, Gender == gender.degree, 
                               survey.time - TOB >= agegroup[1], survey.time - 
                                 TOB < agegroup[2])
      }
      else {
        dfnew <- dplyr::filter(dfnew, FormTime < survey.time, 
                               DisTime > survey.time - window.width, Gender == 
                                 gender.degree, survey.time - TOB >= agegroup[1], 
                               survey.time - TOB < agegroup[2])
      }
    }
    uniqueOut <- dfnew %>% dplyr::select(ID, relid) %>% distinct %>% 
      rename(Degree = relid)
    if (dim(uniqueOut)[1] != 0) {
      degreedata.df <- aggregate(Degree ~ ID, data = uniqueOut, 
                                 length)
    } else {
      degreedata.df <- data.frame(ID = NA, Degree = NA)
    }
    return(degreedata.df)
  }

  # scenario <- function(interventionlist, tasp.indicator) {
  #   switch(tasp.indicator+1, interventionlist[1:4], interventionlist[1:5])
  # } 





  concurr.pointprev.calculator <- function(datalist,
                                           timepoint = datalist$itable$population.simtime[1] - 0.5){ #DTP = datalist$ptable, DTE = datalist$etable, cfg = configfile){
    #DTP <- datalist$ptable
    #DTL <- datalist$ltable
    output <- data.table()
    DTalive.infected <- alive.infected(datalist = datalist,
                                       timepoint = timepoint, site = "All") # First we only take the data of people who were alive at time_i
    agemix.df <- agemix.df.maker(datalist)
    degrees.df <- degree.df.maker(dataframe.df = agemix.df,
                                  agegroup = c(15, 50),
                                  hivstatus = 2,
                                  survey.time = timepoint,
                                  window.width = 0,
                                  gender.degree = "male",
                                  only.new = FALSE)

    number.people.with.cps <- sum(degrees.df$Degree > 1)
    popsize <- nrow(DTalive.infected)
    concurr.pointprevalence <- number.people.with.cps / popsize
    #output <- rbind(output, concurrdata)
    #outputlist <- list(output=output)
    #return(outputlist)
    return(concurr.pointprevalence)
  }




  cd4.atARTinit <- function(datalist = datalist,
                            agegroup = c(15, 30),
                            timewindow = c(15, 30),
                            cd4count=350, site="All"){

    cd4.atARTinit <- age.group.time.window(datalist = datalist,
                                           agegroup = agegroup,
                                           timewindow = timewindow, site="All")

    cd4.atARTinit <- subset(cd4.atARTinit, TreatTime !=Inf) #HIV positive individuals

    raw.df <- data.frame(cd4.atARTinit)
    art.df <- subset(datalist$ttable, ID %in% cd4.atARTinit$ID &
                       TStart > timewindow[1] & TStart < timewindow[2])

    ##What if the person dropped out and come back again?
    art.df <- data.frame(dplyr::summarise(dplyr::group_by(art.df, ID, Gender),
                                          CD4atARTstart = min(CD4atARTstart)))

    #indicate those who started their treatment when their CD4 count was below a given threshold
    art.df <- art.df %>% dplyr::mutate(ART.start.CD4 = CD4atARTstart < cd4count)

    # Now we apply the left_join dplyr function to add the ART status to raw.df.
    raw.df <- dplyr::left_join(x = raw.df, y = art.df, by = c("ID", "Gender"))

    #provide a summary of those that are on treatment and those that started below a threshold
    cd4count.atARTInit <- data.frame(dplyr::summarise(dplyr::group_by(raw.df, Gender),
                                                      TotalCases = n(),
                                                      LessCD4initThreshold =sum(ART.start.CD4)))

    return(cd4count.atARTInit)
  }
  root.path <- dirname(getwd())
  destDir <- paste0(root.path, "/temp")
  age.distr <- agedistr.creator(shape = 5, scale = 65)
  cfg.list <- input.params.creator(population.eyecap.fraction = 0.2, #0.21,#1,
                                   population.simtime = 15, #40,
                                   population.nummen = 2500,
                                   population.numwomen = 2500,
                                   hivseed.time = 10,
                                   hivseed.type = "amount",
                                   hivseed.amount = 25, #30,
                                   hivseed.age.min = 20,
                                   hivseed.age.max = 50,
                                   hivtransmission.param.a = -1,
                                   hivtransmission.param.b = -90,
                                   hivtransmission.param.c = 0.5,
                                   hivtransmission.param.f1 = log(inputvector[2]) , #log(2),
                                   hivtransmission.param.f2 = log(log(sqrt(inputvector[2])) / log(inputvector[2])) / 5, #log(log(1.4) / log(2)) / 5,
                                   formation.hazard.agegapry.gap_factor_man_age = -0.01, #-0.01472653928518528523251061,
                                   formation.hazard.agegapry.gap_factor_woman_age = -0.01, #-0.0726539285185285232510561,
                                   formation.hazard.agegapry.meanage = -0.025,
                                   formation.hazard.agegapry.gap_factor_man_const = 0,
                                   formation.hazard.agegapry.gap_factor_woman_const = 0,
                                   formation.hazard.agegapry.gap_factor_man_exp = -1, #-6,#-1.5,
                                   formation.hazard.agegapry.gap_factor_woman_exp = -1, #-6,#-1.5,
                                   formation.hazard.agegapry.gap_agescale_man = inputvector[3], # 0.25,
                                   formation.hazard.agegapry.gap_agescale_woman = inputvector[3], # 0.25,#-0.30000007,#-0.03,
                                   debut.debutage = 15,
                                   conception.alpha_base = inputvector[14]#-2.5#,
                                   #person.art.accept.threshold.dist.fixed.value = 0
  )

  # cfg.list["diagnosis.baseline"] <- 100
  # cfg.list["diagnosis.HSV2factor"] <- 0
  # cfg.list["diagnosis.agefactor"] <- 0
  # cfg.list["diagnosis.beta"] <- 0
  # cfg.list["diagnosis.diagpartnersfactor"] <- 0
  # cfg.list["diagnosis.genderfactor"] <- 0
  # cfg.list["diagnosis.isdiagnosedfactor"] <- 0
  # cfg.list["diagnosis.t_max"] <- 200
  cfg.list["formation.hazard.agegapry.baseline"] <- 2
  cfg.list["mortality.aids.survtime.C"] <- 65
  cfg.list["mortality.aids.survtime.k"] <- -0.2
  cfg.list["monitoring.fraction.log_viralload"] <- 0.3
  cfg.list["dropout.interval.dist.uniform.min"] <- 100
  cfg.list["dropout.interval.dist.uniform.max"] <- 200


  cfg.list["person.agegap.man.dist.type"] <- "normal" #fixed
  #cfg.list["person.agegap.man.dist.fixed.value"] <- -6
  cfg.list["person.agegap.woman.dist.type"] <- "normal" #"fixed"
  #cfg.list["person.agegap.woman.dist.fixed.value"] <- -6

  cfg.list["mortality.aids.survtime.C"] <- 65
  cfg.list["mortality.aids.survtime.k"] <- -0.2
  cfg.list["monitoring.cd4.threshold"] <- 0
  cfg.list["person.agegap.man.dist.normal.mu"] <- inputvector[4] # 0
  cfg.list["person.agegap.woman.dist.normal.mu"] <- inputvector[4] # 0
  cfg.list["person.agegap.man.dist.normal.sigma"] <- inputvector[5] ######### 3
  cfg.list["person.agegap.woman.dist.normal.sigma"] <- inputvector[5] ######### 3
  cfg.list["person.eagerness.man.dist.gamma.a"] <- inputvector[6] ######### 0.23
  cfg.list["person.eagerness.woman.dist.gamma.a"] <- inputvector[7] ######### 0.23
  cfg.list["person.eagerness.man.dist.gamma.b"] <- inputvector[8] ######### 45
  cfg.list["person.eagerness.woman.dist.gamma.b"] <- inputvector[9] ######### 45


  cfg.list["person.survtime.logoffset.dist.type"] <- "normal"
  cfg.list["person.survtime.logoffset.dist.normal.mu"] <- 0
  cfg.list["person.survtime.logoffset.dist.normal.sigma"] <- 0.1

  cfg <- cfg.list

  cfg["population.maxevents"] <- as.numeric(cfg.list["population.simtime"][1]) * as.numeric(cfg.list["population.nummen"][1]) * 3
  cfg["monitoring.fraction.log_viralload"] <- 0.3
  cfg["person.vsp.toacute.x"] <- 5 # See Bellan PLoS Medicine

  seedid <- inputvector[1]
  #cfg["person.agegap.man.dist.fixed.value"] <- -2 # inputvector[2]
  #cfg["person.agegap.woman.dist.fixed.value"] <- -2 # inputvector[2]
  cfg["formation.hazard.agegapry.gap_factor_man_exp"] <- inputvector[10] ######### -0.5
  cfg["formation.hazard.agegapry.gap_factor_woman_exp"] <- inputvector[10] ######### -0.5
  cfg["formation.hazard.agegapry.baseline"] <- inputvector[11]

  cfg["formation.hazard.agegapry.numrel_man"] <- inputvector[12]
  cfg["formation.hazard.agegapry.numrel_woman"] <- inputvector[13]
  # inputvector[14] is conception.alpha.base (higher up)
  cfg["dissolution.alpha_0"] <- inputvector[15]
  cfg["dissolution.alpha_4"] <- inputvector[16]

  # Let's introduce ART, and evaluate whether the HIV prevalence drops less  rapidly
  art.intro <- list()
  art.intro["time"] <- 25
  art.intro["person.art.accept.threshold.dist.fixed.value"] <- 0.5 # inputvector[4] ######### 0.5
  art.intro["diagnosis.baseline"] <- 0#100
  art.intro["monitoring.cd4.threshold"] <- 100 # 1200
  #art.intro["monitoring.interval.piecewise.cd4s"] <- "0,1300"


  # Gradual increase in CD4 threshold. in 2007:200. in 2010:350. in 2013:500

  art.intro2 <- list()
  art.intro2["time"] <- 25 + 5 # inputvector[5] ######### 30
  art.intro2["monitoring.cd4.threshold"] <- 200

  art.intro3 <- list()
  art.intro3["time"] <- 33 # inputvector[4] + inputvector[5] + inputvector[6] ########### 33
  art.intro3["monitoring.cd4.threshold"] <- 350

  art.intro4 <- list()
  art.intro4["time"] <- 3 # inputvector[4] + inputvector[5] + inputvector[6] + inputvector[7] ########### 36
  art.intro4["monitoring.cd4.threshold"] <- 500

  art.intro5 <- list()
  art.intro5["time"] <- 38
  art.intro5["monitoring.cd4.threshold"] <- 5000 # This is equivalent to immediate access
  art.intro5["person.art.accept.threshold.dist.fixed.value"] <- 0.5 # inputvector[8] ########### 0.75

  # tasp.indicator <- inputvector[9] # 1 if the scenario is TasP, 0 if the scenario is current status

  interventionlist <- list(art.intro, art.intro2, art.intro3, art.intro4, art.intro5)

  intervention <- interventionlist # scenario(interventionlist, tasp.indicator)

  results <- tryCatch(simpact.run(configParams = cfg,
                         destDir = destDir,
                         agedist = age.distr,
                         seed = seedid, #, Introducing ART has helped to keep the prevalence high
                         intervention = intervention),
                      error = simpact.errFunction)
  # results <- simpact.run(configParams = cfg,
  #                        destDir = destDir,
  #                        agedist = age.distr,
  #                        seed = seedid, #, Introducing ART has helped to keep the prevalence high
  #                        intervention = intervention)
  if (length(results) == 0){
    outputvector <- rep(NA, 16)
  } else {
    if (as.numeric(results["eventsexecuted"]) >= (as.numeric(cfg["population.maxevents"]) - 1)) {
      outputvector <- rep(NA, 16)
    } else {
    datalist.agemix <- readthedata(results)
    agemix.df <- agemix.df.maker(datalist.agemix)

    agemix.model <- pattern.modeller(dataframe = agemix.df,
                                     agegroup = c(15, 50),
                                     timepoint = datalist.agemix$itable$population.simtime[1],
                                     timewindow = 1)#3)

    # men.lme <- tryCatch(agemixing.lme.fitter(data = dplyr::filter(agemix.model[[1]], Gender =="male")),
    #                     error = agemixing.lme.errFunction) # Returns an empty list if the lme model can't be fitted

    men.lmer <- tryCatch(ampmodel(data = dplyr::filter(agemix.model[[1]], Gender =="male")),
                         error = agemixing.lme.errFunction) # Returns an empty list if the lme model can't be fitted

    bignumber <- NA # let's try if NA works (instead of 9999 for example)

    AAD.male <- ifelse(length(men.lmer) > 0, mean(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber)
    SDAD.male <- ifelse(length(men.lmer) > 0, sd(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber)
    #powerm <- ifelse(length(men.lme) > 0, as.numeric(attributes(men.lme$apVar)$Pars["varStruct.power"]), bignumber)
    slope.male <- ifelse(length(men.lmer) > 0, summary(men.lmer)$coefficients[2, 1], bignumber) #summary(men.lmer)$tTable[2, 1], bignumber)
    WSD.male <- ifelse(length(men.lmer) > 0, summary(men.lmer)$sigma, bignumber) #WVAD.base <- ifelse(length(men.lme) > 0, men.lme$sigma^2, bignumber)

    BSD.male <- ifelse(length(men.lmer) > 0, bvar(men.lmer), bignumber) # Bad name for the function because it actually extracts between subject standard deviation # BVAD <- ifelse(length(men.lmer) > 0, getVarCov(men.lme)[1,1], bignumber)

    intercept.male <- ifelse(length(men.lmer) > 0, summary(men.lmer)$coefficients[1,1] - 15, bignumber) 

    #agegap.mean <- mean(datalist.agemix$rtable$AgeGap)
    #agegap.sd <- sd(datalist.agemix$rtable$AgeGap)


    ###
    # Now we fit the negative binomial distribution
    ###
    degrees.df <- degree.df.maker(dataframe.df = agemix.df,
                                  agegroup = c(18, 50),
                                  hivstatus = 2,
                                  survey.time = datalist.agemix$itable$population.simtime[1],
                                  window.width = 1,
                                  gender.degree = "male",
                                  only.new = TRUE)


    # If we want to know % of men who had 0 partners in the past 12 months,
    # We need to compare nrow(degree.df) with the number of 18-50 year old men #HIV negative women
    # that were alive at the time of the survey
    allsurveymen <- dplyr::filter(datalist.agemix$ptable,
                                  Gender == 1, # Male
                                  TOD > datalist.agemix$itable$population.simtime[1], # Still alive at the time of the survey
                                  TOB <= datalist.agemix$itable$population.simtime[1] - 18, # Not younger than 18 at the time of the survey
                                  TOB > datalist.agemix$itable$population.simtime[1] - 50)#, # Not older than 50 at the time of the survey
    #InfectTime > datalist.agemix$itable$population.simtime[1]) # HIV negative at the time of the survey

    # Creating the vector of degrees
    degree.vector <- c(rep(0, times = (nrow(allsurveymen) - nrow(degrees.df))), degrees.df$Degree)
    meandegree.male <- mean(degree.vector)
    # hist(degree.vector, 10)
    # degree.vector <- rnbinom(n = 100, size =1.28, mu = 0.66) # PLACEHOLDER FOR NOW
    # Fitting the negative binomial distribution to this vector

    fit.negbin <- tryCatch(fitdist(degree.vector, "nbinom"), error = agemixing.lme.errFunction)
    shape.nb.male <- ifelse(length(fit.negbin) > 0, as.numeric(fit.negbin$estimate[2]), bignumber)
    scale.nb.male <- ifelse(length(fit.negbin) > 0, as.numeric(fit.negbin$estimate[1]), bignumber) #(theta = p/(1-p))

    #   degrees.df <- data.frame(degree = degree.vector)
    #   # Model for black men in the last year
    #   nb.fit <- glm.nb(degree~1,
    #                        data = degrees.df,
    #                        control = glm.control(maxit = 100000, trace = 3),
    #                        init.theta = 1)
    # summary(nb.fit) # intercept exp(0.25), theta = 0.65
    # shape.nb <- exp(as.numeric(nb.fit$coefficients)) #(r)
    # scale.nb <- as.numeric(nb.fit$theta) #(theta = p/(1-p))


    # Concurrency point prevalence 6 months before a survey, among men
    pp.cp.6months.male <- concurr.pointprev.calculator(datalist = datalist.agemix,
                                                       timepoint = datalist$itable$population.simtime[1] - 0.5)


    hiv.prev.lt25.women <- prevalence.calculator(datalist = datalist.agemix,
                                                 agegroup = c(15, 25),
                                                 timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[2]
    hiv.prev.lt25.men <- prevalence.calculator(datalist = datalist.agemix,
                                               agegroup = c(15, 25),
                                               timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[1]
    hiv.prev.25.34.women <- prevalence.calculator(datalist = datalist.agemix,
                                                  agegroup = c(25, 35),
                                                  timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[2]
    hiv.prev.25.34.men <- prevalence.calculator(datalist = datalist.agemix,
                                                agegroup = c(25, 35),
                                                timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[1]
    hiv.prev.35.44.women <- prevalence.calculator(datalist = datalist.agemix,
                                                  agegroup = c(35, 45),
                                                  timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[2]
    hiv.prev.35.44.men <- prevalence.calculator(datalist = datalist.agemix,
                                                agegroup = c(35, 45),
                                                timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[1]


    # ART coverage vector
    # ART.cov.eval.timepoints <- seq(from = intervention[[1]]$time,
    #                                to = datalist.agemix$itable$population.simtime[1])
    # ART.cov.vector <- rep(NA, length(ART.cov.eval.timepoints))
    # for (art.cov.index in 1:length(ART.cov.vector)){
    #   ART.cov.vector[art.cov.index] <- sum(ART.coverage.calculator(datalist = datalist.agemix,
    #                                                                agegroup = c(15, 50),
    #                                                                timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.onART) /
    #     sum(ART.coverage.calculator(datalist = datalist.agemix,
    #                                 agegroup = c(15, 50),
    #                                 timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.cases)
  # }
  # Vector of the fraction of people who started their treatment when their CD4count was between specified values
  # cd4.atARTinit.fractions.timepoints <- seq(from = intervention[[1]]$time + 5,
  #                                         to = datalist.agemix$itable$population.simtime[1],
  #                                         by = 5) #30, 35, 40
  # cd4.any.vector <- cd4.lt100.vector <- cd4.100.200.vector <- cd4.200.350.vector <- cd4.350.500.vector <- cd4.500plus.vector <- rep(NA, length(cd4.atARTinit.fractions.timepoints))
  # for (cd4.index in 1:length(cd4.lt100.vector)){
  #   cd4.lt100.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
  #                                                      TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
  #                                                      TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
  #                                                      CD4atARTstart < 100) %>% select(ID) %>% unique() %>% nrow()
  #   cd4.100.200.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
  #                                                      TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
  #                                                      TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
  #                                                      CD4atARTstart >= 100,
  #                                                      CD4atARTstart < 200) %>% select(ID) %>% unique() %>% nrow()
  #   cd4.200.350.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
  #                                                      TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
  #                                                      TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
  #                                                      CD4atARTstart >= 200,
  #                                                      CD4atARTstart < 350) %>% select(ID) %>% unique() %>% nrow()
  #   cd4.350.500.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
  #                                                      TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
  #                                                      TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
  #                                                      CD4atARTstart >= 350,
  #                                                      CD4atARTstart < 500) %>% select(ID) %>% unique() %>% nrow()
  #   cd4.500plus.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
  #                                                      TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
  #                                                      TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5),
  #                                                      CD4atARTstart >= 500) %>% select(ID) %>% unique() %>% nrow()
  #   cd4.any.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable,
  #                                                      TStart < cd4.atARTinit.fractions.timepoints[cd4.index],
  #                                                      TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5)) %>% select(ID) %>% unique() %>% nrow()
  # }

  # incid.final <- incidence.calculator(datalist = datalist.agemix,
  #                                     agegroup = c(15, 50),
  #                                     timewindow = c((datalist.agemix$itable$population.simtime[1] - 2), datalist.agemix$itable$population.simtime[1]),
  #                                     only.active = "No")$incidence[3]

  growthrate <- pop.growth.calculator(datalist = datalist.agemix,
                                      timewindow = c(0, datalist.agemix$itable$population.simtime[1]))

  outputvector <- c(AAD.male, SDAD.male, slope.male, WSD.male, BSD.male, intercept.male,
                    shape.nb.male, scale.nb.male,
                    #meandegree.male,
                    pp.cp.6months.male,
                    hiv.prev.lt25.women,
                    hiv.prev.lt25.men,
                    hiv.prev.25.34.women,
                    hiv.prev.25.34.men,
                    hiv.prev.35.44.women,
                    hiv.prev.35.44.men, 
                    exp(growthrate))
    }
  }
  return(outputvector)
}
MICE_ABC <- function(targets = targets.empirical,
                     n.experiments = 16,
                     lls = c(1.01, 0.15, -1, 2, 0.05, 0.05, 10,  10,  -1,   0, -1,   -1,   -5, -2,   -0.2), #c(2, -1,   0.1, 2, 1, 1, 0.3), #c(0.1, -3, 0.1,  0,  0,  0, 0.2),
                     uls = c(3,    0.35,  1, 5, 1,       1, 100, 100, -0.2, 5, -0.1, -0.1, -1, -0.2,    0), #c(4, -0.2, 0.6, 8, 6, 6, 0.95), #c(5, -0.1, 1.0, 10, 10, 10, 1.0),
                     # probs = c(3, 7), # indicate in the lls and uls vectors, which elements represent probabilities strictly between 0 and 1
                     model = simpact.wrapper,
                     maxit = 10,
                     maxwaves = 2, # and we can also try 10 to do the same as the 10 waves of Lenormand
                     #reps = 5,
                     alpha = 0.25,
                     saturation.crit = 0){
  ptm <- proc.time() # Start the clock

  range.width <- uls - lls
  ll.mat <- matrix(rep(lls, n.experiments), nrow = n.experiments, byrow = TRUE)
  range.width.mat <- matrix(rep(range.width, n.experiments), nrow = n.experiments, byrow = TRUE)
  sobol.seq.0.1 <- sobol(n = n.experiments, dim = length(lls), init = TRUE, scrambling = 1, seed = 1, normal = FALSE)
  experiments <- ll.mat + sobol.seq.0.1 * range.width.mat

  calibration.list <- list() # initiating the list where all the output of MiceABC will be stored

  wave <- 1 # initiating the loop of waves of simulations (one iteration is one wave)
  rel.dist.cutoff <- Inf # initially it is infinitely large, but in later iterations it shrinks
  saturation <- 0
  sim.results.with.design.df.selected <- NULL


  while (wave <= maxwaves && saturation == 0){
    print(c("wave", wave), quote = FALSE) # only for local testing. To be deleted after testing is completed
    # These large and small enough tests could be commented out because the "true" parameter value may lie outside the initial parameter range.
    # They may be useful to prevent infeasible MICE-derived proposals from running (e.g. positive parameters values that don't make sense)
    large.enough <- t(t(experiments) >= lls)
    small.enough <- t(t(experiments) <= uls)
    fine <- large.enough & small.enough
    fine.flag <- as.logical(rowSums(fine) == ncol(fine))
    experiments <- experiments[fine.flag, ]
    #try(if(nrow(experiments) == 0) stop("no experiments in the range"))
    if (nrow(experiments) < 2){
      wave <- maxwaves
    } else {
    print(c(nrow(experiments), "experiments to be run"), quote = FALSE) # only for local testing. To be deleted after testing is completed

    calibration.list$experiments.executed[[wave]] <- nrow(experiments)

    # Running the experiments
    # nl.path <- "/Applications/NetLogo 6.0/app/"
    # NLStart(nl.path, gui=FALSE, nl.obj="agemix.nl1", nl.jarname = 'netlogo-6.0.0.jar')
    # model.path <- "/Users/delvaw/Google Drive/IBM course/tutorials/AgeMixing/Calibration/dummy.0.1.nlogo"
    # NLLoadModel(model.path, nl.obj="agemix.nl1")
    # source(file = "/Users/delvaw/Google Drive/IBM course/tutorials/AgeMixing/Calibration/agemix.simulate.R")

    # sim.results.simple <- t(apply(experiments, 1, model, no.repeated.sim=reps)) # Maybe this apply() can be replaced by a faster foreach loop?
    # Replacing NetLogo model with parallel Simpact model

    # 1. Initially: n.experiments from sobol sequences. 7. Later, (1-alpha) fraction of n.experiments from MICE proposals
    sim.results.simple <- simpact.parallel(model = model,
                                           actual.input.matrix = experiments,
                                           seed_count = 0,
                                           n_cluster = 8)

    #experiment.number <- 1:nrow(experiments)
    sim.results.with.design.df <- as.data.frame(cbind(#experiment.number,
      experiments,
      sim.results.simple))
    x.names <- paste0("x.", seq(1:ncol(experiments)))
    y.names <- paste0("y.", seq(1:ncol(sim.results.simple)))
    names(sim.results.with.design.df) <- c(x.names, y.names)

    # 2. All n.experiments from sobol sequences get summarised measure of distance from target. 8. Distance measures for new proposals
    # NEEDS TO BE AUTOMATED

    # for(i in 1:6) { #-- Create objects  'r.1', 'r.2', ... 'r.6' --
    #   nam <- paste("r", i, sep = ".")
    #   assign(nam, 1:i)
    # }

    #browser()

    sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
    for(i in 1:length(targets)) {
      nam <- paste0("y.", i, ".sq.rel.dist")
      val <- ((sim.results.simple[,i] - targets[i]) / targets[i])^2
      assign(nam, val)
      sum.sq.rel.dist <- sum.sq.rel.dist + get(nam)
    }

    sim.results.with.design.df$sum.sq.rel.dist <- sum.sq.rel.dist

    # sim.results.with.design.df$y.1.sq.rel.dist <- ((sim.results.with.design.df$y.1 - targets[1]) / targets[1])^2
    # sim.results.with.design.df$y.2.sq.rel.dist <- ((sim.results.with.design.df$y.2 - targets[2]) / targets[2])^2
    # sim.results.with.design.df$y.3.sq.rel.dist <- ((sim.results.with.design.df$y.3 - targets[3]) / targets[3])^2
    # sim.results.with.design.df$y.4.sq.rel.dist <- ((sim.results.with.design.df$y.4 - targets[4]) / targets[4])^2
    # sim.results.with.design.df$y.5.sq.rel.dist <- ((sim.results.with.design.df$y.5 - targets[5]) / targets[5])^2
    # sim.results.with.design.df$y.6.sq.rel.dist <- ((sim.results.with.design.df$y.6 - targets[6]) / targets[6])^2
    # sim.results.with.design.df$y.7.sq.rel.dist <- ((sim.results.with.design.df$y.7 - targets[7]) / targets[7])^2
    # sim.results.with.design.df$y.8.sq.rel.dist <- ((sim.results.with.design.df$y.8 - targets[8]) / targets[8])^2
    # sim.results.with.design.df$y.9.sq.rel.dist <- ((sim.results.with.design.df$y.9 - targets[9]) / targets[9])^2
    #
    #
    # sim.results.with.design.df$sum.sq.rel.dist <- sim.results.with.design.df$y.1.sq.rel.dist + sim.results.with.design.df$y.2.sq.rel.dist + sim.results.with.design.df$y.3.sq.rel.dist + sim.results.with.design.df$y.4.sq.rel.dist + sim.results.with.design.df$y.5.sq.rel.dist + sim.results.with.design.df$y.6.sq.rel.dist + sim.results.with.design.df$y.7.sq.rel.dist + sim.results.with.design.df$y.8.sq.rel.dist + sim.results.with.design.df$y.9.sq.rel.dist

    # 2b. Writing to list all the input and output of the executed experiments, so that we can plot it later
    calibration.list$sim.results.with.design[[wave]] <- sim.results.with.design.df

    # 10. Calculate fraction of new (1-alpha frac *n.experiments) distances that are below "old" distance threshold. NOT CURRENTLY USED BECAUSE saturation.crit = 0.
    below.old.treshold <- sim.results.with.design.df$sum.sq.rel.dist < rel.dist.cutoff
    frac.below.old.threshold <- sum(below.old.treshold %in% TRUE) / round(n.experiments * (1-alpha))
    if(frac.below.old.threshold < saturation.crit) saturation <- 1 # If less than the fraction saturation.crit of the new experiments a closer fit than the previous batch of retained experiments, the loop must be terminated.


    # 9. Merge with previously kept alpha fraction shortest distances
    sim.results.with.design.df <- rbind(sim.results.with.design.df.selected, sim.results.with.design.df) # Initially sim.results.with.design.df.selected = NULL

    # 3. Keeping alpha fraction shortest distances

    dist.order <- order(sim.results.with.design.df$sum.sq.rel.dist) # Ordering the squared distances from small to big. The last observation (targets) should be ranked first
    last.one.selected <- round(alpha * n.experiments) # OR IT COULD ALSO BE WRITTEN AS round(alpha * length(dist.order)) BUT IF RANGE CHECKS ARE ON, THE nrow IS NOT NECESSARILY = n.experiments
    selected.distances <- dist.order[1:last.one.selected]
    sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] %>% filter(complete.cases(.)) # Here we filter out any experiments that gave NA output

    # 4. Set distance threshold at alpha-percentile best distance. 11. Update distance threshold
    rel.dist.cutoff <- max(sim.results.with.design.df.selected$sum.sq.rel.dist)  # We update the rel.dist.cutoff, based on the alpha fraction best parametersets

    # 5. Write alpha fraction of input-output-distances and distance threshold to list. 12. Write new best alpha fraction and updated threshold to list
    calibration.list$inputoutput[[wave]] <- sim.results.with.design.df.selected
    calibration.list$rel.dist.cutoff[[wave]] <- rel.dist.cutoff

    # 6. Feed MICE the best alpha fraction and ask for (1-alpha) fraction proposals based on targets
    targets.df <- as.data.frame(matrix(targets, ncol = length(targets))) # Putting target in dataframe format

    names(targets.df) <- y.names
    df.give.to.mice <- full_join(dplyr::select(sim.results.with.design.df.selected,
                                               -contains("sq.rel.dist")), # adding target to training dataset
                                 targets.df,
                                 by = names(targets.df)) # "by" statement added to avoid printing message of the variables were used for joining

    ### Before actually giving it to MICE, we need to logit transform the input params that represent probabilities, so that we can treat these variables as continuous variables
    # "probs" shows that it is the 3rd and 7th input variable (not counting the random seed variable)
    # df.give.to.mice[, probs] <- car::logit(df.give.to.mice[, probs]) # Commented out because none of the input parameters are probabilities for this age-mixing calibration
    # INSTEAD, we are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b.
    # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet
    strict.positive.params <- c(4:8)
    df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params])

    # Let's think a bit more carefully about which variables should be allowed as input for which input parameters.
    # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS.
    predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix.
    # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model.
    # 1. hivtransmission.param.f1 (2)
    # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
    # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
    # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
    # 5. person.person.eagerness.man.dist.gamma.a (0.23)
    # 6. person.person.eagerness.woman.dist.gamma.a (0.23)
    # 7. person.person.eagerness.man.dist.gamma.b (45)
    # 8. person.person.eagerness.woman.dist.gamma.b (45)
    # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
    # 10. formation.hazard.agegapry.baseline (0)
    # 11. formation.hazard.agegapry.numrel_man (-0.5)
    # 12. formation.hazard.agegapry.numrel_woman (-0.5)
    # 13. conception.alpha_base (-2.5)
    # 14. dissolution.alpha_0 (-0.52)
    # 15. dissolution.alpha_4 (-0.05))

    # And a reminder of the output vector
    # 1. AAD.male (4)
    # 2. SDAD.male (4)
    # 3. slope.male (0.7)
    # 4. WSD.male (3)
    # 5. BSD.male (1.5)
    # 6. intercept.male (-1)
    # 7. shape.nb.male (1.29)
    # 8. scale.nb.male (0.66)
    # THIS IS OUT 9. meandegree.male (1)
    # 10. pp.cp.6months.male (0.134)
    # 11. hiv.prev.lt25.women (0.2)
    # 12. hiv.prev.lt25.men (0.1)
    # 13. hiv.prev.25.34.women (0.42)
    # 14. hiv.prev.25.34.men (0.17)
    # 15. hiv.prev.35.44.women (0.37)
    # 16. hiv.prev.35.44.men (0.24)
    # 17. exp(growthrate)) (1.01)

    x.offset <- length(x.names)


    predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them.
    # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage.
    predictorMatrix[1, x.offset + 11:12] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women
    predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope
    predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD
    predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD
    predictorMatrix[5, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[6, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate)
    predictorMatrix[7, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[8, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 15, 16, 17)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence,
    predictorMatrix[10, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence.
    predictorMatrix[11, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth
    predictorMatrix[12, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth
    predictorMatrix[13, x.offset + 17] <- 1 # conception.alpha_base is predicted by popgrowth
    predictorMatrix[14, x.offset + c(7, 8, 10, 17)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth
    predictorMatrix[15, x.offset + c(7, 8, 10, 17)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?)

    # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity.

    # Because there are some many interaction terms, let's first try without any
    #df.give.to.mice <- cbind(df.give.to.mice, data.frame(y.1.y.2 = df.give.to.mice$y.1 * df.give.to.mice$y.2)) # adding interaction term
    print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE)

    mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data
                      m = round(n.experiments * (1-alpha)), # number of imputations
                      predictorMatrix = predictorMatrix,
                      method = "norm",
                      defaultMethod = "norm",
                      maxit = maxit,
                      printFlag = FALSE,
                      data.init = NULL)



    # experiments <- cbind(unlist(mice.test$imp$x.1), # these are the (1-alpha) fraction of n.experiments proposals
    #                      unlist(mice.test$imp$x.2))

    experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names))
    # Before we check the suitability of the new experimental input parameter values, we must backtransform the logits to probabilities
    # experiments[, probs] <- boot::inv.logit(experiments[, probs])
    # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values
    experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params])
    }
    wave <- wave + 1
  }

  calibration.list$secondspassed <- proc.time() - ptm # Stop the clock
  return(calibration.list)
}

Input parameters

  1. hivtransmission.param.f1 (2)
  2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
  3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
  4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
  5. person.person.eagerness.man.dist.gamma.a (0.23)
  6. person.person.eagerness.woman.dist.gamma.a (0.23)
  7. person.person.eagerness.man.dist.gamma.b (45)
  8. person.person.eagerness.woman.dist.gamma.b (45)
  9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
  10. formation.hazard.agegapry.baseline (2)
  11. formation.hazard.agegapry.numrel_man (-0.5)
  12. formation.hazard.agegapry.numrel_woman (-0.5)
  13. conception.alpha_base (-2.5)
  14. dissolution.alpha_0 (-0.52)
  15. dissolution.alpha_4 (-0.05)

Output statistics (and targets)

  1. AAD.male (4)
  2. SDAD.male (4)
  3. slope.male (0.7)
  4. WSD.male (3)
  5. BSD.male (1.5)
  6. intercept.male (-1)
  7. shape.nb.male (1.29)
  8. scale.nb.male (0.66) [9. meandegree.male (1)] let's not use this anymore because it is very strongly correlated with 7.
  9. pp.cp.6months.male (0.134)
  10. hiv.prev.lt25.women (0.2)
  11. hiv.prev.lt25.men (0.1)
  12. hiv.prev.25.34.women (0.42)
  13. hiv.prev.25.34.men (0.17)
  14. hiv.prev.35.44.women (0.37)
  15. hiv.prev.35.44.men (0.24)
  16. exp(growthrate)) (1.01)
# A function to run Simpact in parallel
simpact.parallel <- function(model = simpact.wrapper,
                             actual.input.matrix = matrix(rep(c(1:15), 16), nrow = 16),
                             #nb_simul = 16,
                             seed_count = 0,
                             n_cluster = 8){
  cl <- makeCluster(getOption("cl.cores", n_cluster))
  tab_simul_summarystat = NULL
  list_param <- list(NULL)
  tab_param <- NULL
  paramtemp <- NULL
  simultemp <- NULL

  nb_simul <- nrow(actual.input.matrix)

  for (i in 1:nb_simul) {
    l <- ncol(actual.input.matrix)
    param <- c((seed_count + i), actual.input.matrix[i, ])
    list_param[[i]] <- param
    tab_param <- rbind(tab_param, param[2:(l + 1)])
    paramtemp <- rbind(paramtemp, param[2:(l + 1)])
  }
  list_simul_summarystat = parLapplyLB(cl, list_param,
                                       model)
  tab_simul_summarystat <- do.call(rbind, list_simul_summarystat)
  stopCluster(cl)
  return(tab_simul_summarystat)
}

Let's create the "truth" with the master model.

reps <- 2

target.vector.master <- c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05)
test.inputvectorplusseed <- c(1, target.vector.master)
# 1. hivtransmission.param.f1 (2)
# 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
# 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
# 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
# 5. person.person.eagerness.man.dist.gamma.a (0.23)
# 6. person.person.eagerness.woman.dist.gamma.a (0.23)
# 7. person.person.eagerness.man.dist.gamma.b (45)
# 8. person.person.eagerness.woman.dist.gamma.b (45)
# 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
# 10. formation.hazard.agegapry.baseline (0)
# 11. formation.hazard.agegapry.numrel_man (-0.5)
# 12. formation.hazard.agegapry.numrel_woman (-0.5)
# 13. conception.alpha_base (-2.5)
# 14. dissolution.alpha_0 (-0.52)
# 15. dissolution.alpha_4 (-0.05))

# simpact.wrapper(test.inputvectorplusseed)
# 1. AAD.male (4)
# 2. SDAD.male (4)
# 3. slope.male (0.7)
# 4. WSD.male (3)
# 5. BSD.male (1.5)
# 6. intercept.male (-1)
# 7. shape.nb.male (1.29)
# 8. scale.nb.male (0.66)
# THIS IS OUT 9. meandegree.male (1)
# 10. pp.cp.6months.male (0.134)
# 11. hiv.prev.lt25.women (0.2)
# 12. hiv.prev.lt25.men (0.1)
# 13. hiv.prev.25.34.women (0.42)
# 14. hiv.prev.25.34.men (0.17)
# 15. hiv.prev.35.44.women (0.37)
# 16. hiv.prev.35.44.men (0.24)
# 17. exp(growthrate)) (1.01)

targets.empirical <- c(4, 4, 0.7, 3, 1.5, -1, 1.29, 0.66, 0.134, 0.05, 0.03, 0.1, 0.05, 0.08, 0.09, 1.01)
targets.master <- c(4.0651198,
                    3.5641622,
                    0.5463380,
                    1.8322277,
                    1.3357401,
                    -0.6137708,
                    0.4686528,
                    2.1738464,
                    0.1203855,
                    0.3389044,
                    0.2751678,
                    0.3911846,
                    0.5899390,
                    0.3227513,
                    0.4395604,
                    1.0111390)


targets.diff <- targets.empirical - targets.master
# Filling default values, to facilitate interactive testing of incremental mice function
RMSD.tol.max <- 1
min.givetomice <- 80
n.experiments <- 400
lls <- c(1,  0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16)
uls <- c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3,  -0.001)
model <- simpact.wrapper
maxit <- 50
maxwaves <- 8

# design.matrix.control <- matrix(rep(target.vector.master, reps), nrow = reps, byrow = TRUE)
# 
# design.matrix.interv <- matrix(rep(c(3, -0.5, 0.5, 5, 3, 3, 0.75, 1), reps), nrow = reps, byrow = TRUE)
# test.mat <- matrix(data = NA, nrow = 6, ncol = 17)
# for (test.i in 1:6){
#   test.mat[test.i, ] <- simpact.wrapper(c(test.i, experiments[test.i, ]))
# }
# 
# sumsmstats.control.df <- simpact.parallel(model = simpact.wrapper,
#                                   actual.input.matrix = experiments,#design.matrix.control,
#                                   seed_count = 0,
#                                   n_cluster = 8)
# sumsmstats.interv.df <- simpact.parallel(model = simpact.wrapper.master,
#                                   actual.input.matrix = design.matrix.interv,
#                                   seed_count = 0,
#                                   n_cluster = 8)

Testing a MICE-assisted calibration method with changing objective function (a.k.a. "moving targets" or "moving the goalposts")

  1. Decide on n.experiments from sobol sequences, based on computation time.
  2. Determine max range of parameters that can be explored with sufficient density, given n.experiments. This is not straightforward because we do not know, initally, how changes in parameter values translate to changes in model output.
  3. Determine minimum value of m.experiments (training dataset) required for mice, given the number of predictor variables in the chained equations.
  4. Determine for which targets.intermediate level, there are at least m.experiments within a distance x
  5. Use this targets.intermediate in mice to propose and launch another n.experiments (or n-m experiments?)
  6. Determine for which NEW targets.intermediate level, closer to the empirical targets, there are at least m.experiments within a distance x.
  7. Cycle between steps 5 and 6 until the empirical targets are within reach.
root.path <- dirname(getwd())

MICE_ABC.incremental <- function(targets.empirical = targets.empirical,
                                 RMSD.tol.max = 1,
                                 min.givetomice = 128,
                                 n.experiments = 1024,
                                 lls = c(1,  0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16),
                                 uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3,  -0.001),
                                 model = simpact.wrapper,
                                 maxit = 50,
                                 maxwaves = 8){
  # 0. Start the clock
  ptm <- proc.time()
  calibration.list <- list() # initiating the list where all the output of MiceABC will be stored
  wave <- 1 # initiating the loop of waves of simulations (one iteration is one wave)
  rel.dist.cutoff <- Inf # initially it is infinitely large, but in later iterations it shrinks

  x.names <- paste0("x.", seq(1:15))
  y.names <- paste0("y.", seq(1:16))

  sim.results.with.design.df <- data.frame(matrix(vector(), 0, 31,
                                                  dimnames = list(c(), c(x.names, y.names))),
                                           stringsAsFactors = FALSE) # Will be growing with each wave (appending)
  # sim.results.with.design.df.selected I think this does not need to be initiated
  final.intermediate.features <- rep(NA, times = length(targets.empirical))

  # 1. Start loop of waves, based on comparing intermediate features with targets.empirical
  while (wave <= maxwaves & !identical(final.intermediate.features, targets.empirical)){
    if (wave == 1){
      # 2. Initial, naive results, based on Sobol sequences
      range.width <- uls - lls
      ll.mat <- matrix(rep(lls, n.experiments), nrow = n.experiments, byrow = TRUE)
      range.width.mat <- matrix(rep(range.width, n.experiments), nrow = n.experiments, byrow = TRUE)
      sobol.seq.0.1 <- sobol(n = n.experiments, dim = length(lls), init = TRUE, scrambling = 1, seed = 1, normal = FALSE)
      experiments <- ll.mat + sobol.seq.0.1 * range.width.mat
    }

    sim.results.simple <- simpact.parallel(model = model,
                                           actual.input.matrix = experiments,
                                           seed_count = 0,
                                           n_cluster = 8)
    #save(sim.results.simple, file = paste0(root.path,"/MiceABC/sim.results.simple.RData"))
    #load(file = "sim.results.simple.RData")

    new.sim.results.with.design.df <- as.data.frame(cbind(experiments,
                                                          sim.results.simple))
    x.names <- paste0("x.", seq(1:ncol(experiments)))
    y.names <- paste0("y.", seq(1:ncol(sim.results.simple)))
    x.offset <- length(x.names)
    names(new.sim.results.with.design.df) <- c(x.names, y.names)

    new.sim.results.with.design.df <- new.sim.results.with.design.df %>% dplyr::filter(complete.cases(.))

    if (wave == 1){ # TRUE for the first wave only
      sim.results.with.design.df <- rbind(sim.results.with.design.df,
                                          new.sim.results.with.design.df)
    } else {
      sim.results.with.design.df <- rbind(dplyr::select(sim.results.with.design.df,
                                                      -contains("RMSD")),
                                        new.sim.results.with.design.df)
    }


    # save(sim.results.with.design.df, file = "/Users/wimdelva/Documents/MiceABC/sim.results.with.design.df.RData")
    # load(file = "/Users/delvaw/Documents/MiceABC/sim.results.with.design.df.RData")

    experim.median.features <- med(dplyr::select(sim.results.with.design.df, contains("y.")), mustdith = TRUE)$median

    # 3. Find intermediate features and RMSD.tol for which n.close.to.targets >= min.givetomice
    targets.diff <- targets.empirical - experim.median.features # First we determine how far the empirical targets are away from the median features of the executed experiments
    candidate.RMSD.tol <- Inf # Initially, we assume that the RMSD cut-off needs to be infinitely large to have sufficient observations to give to mice.

    n.steps.targets <- 100
    n.steps.RMSD.tol <- 100

    n.close.to.targets.mat <- matrix(NA, nrow = n.steps.targets+1, ncol = n.steps.RMSD.tol+1)
    final.RMSD.tol <- NA
    shift.fraction <- NA



    for (steps.intermediate.targets in 0:n.steps.targets){ # We make 10% steps from empirical targets to experimental median features
      candidate.intermediate.features <- targets.empirical - (targets.diff * steps.intermediate.targets) / n.steps.targets
      for (steps.RMSD.tol in 0:n.steps.RMSD.tol){ # We make 10% steps from RMSD.tol.max to RMSD.tol = 0
        RMSD.tol <- RMSD.tol.max * (n.steps.RMSD.tol - steps.RMSD.tol) / n.steps.RMSD.tol

        sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
        for (i in 1:length(candidate.intermediate.features)) {
          name.dist <- paste0("y.", i, ".sq.rel.dist")
          value.dist <- ((sim.results.with.design.df[,i + x.offset] - candidate.intermediate.features[i]) / candidate.intermediate.features[i])^2
          assign(name.dist, value.dist)
          sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist)
        }
        RMSD <- sqrt(sum.sq.rel.dist / length(candidate.intermediate.features))
        n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE)
        n.close.to.targets.mat[(1+steps.intermediate.targets), (1+steps.RMSD.tol)] <- n.close.to.targets
        large.enough.training.df <- n.close.to.targets >= min.givetomice
        if (large.enough.training.df == TRUE){
          if (RMSD.tol < candidate.RMSD.tol){
            candidate.RMSD.tol <- RMSD.tol # Updating the candidate.RMSD.tol
            final.RMSD.tol <- RMSD.tol
            final.intermediate.features <- candidate.intermediate.features
            shift.fraction <- steps.intermediate.targets/n.steps.targets # 0 means targets.empirical; 1 means experim.median.features
            #calibration.list$intermediate.features[[wave]] <- final.intermediate.features
            #calibration.list$steps.intermediate.targets[[wave]] <- steps.intermediate.targets 
            #calibration.list$RMSD.tol[[wave]] <- RMSD.tol 
            #calibration.list$n.close.to.targets[[wave]] <- n.close.to.targets
          }
        }
      }
    }

    n.close.to.targets.mat
    final.RMSD.tol
    final.intermediate.features
    shift.fraction


    # 4. Calculate RMSD values for these intermediate.features and the RMSD.tol found
    sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
    for (i in 1:length(final.intermediate.features)) {
      name.dist <- paste0("y.", i, ".sq.rel.dist")
      value.dist <- ((sim.results.with.design.df[,i + x.offset] - final.intermediate.features[i]) / final.intermediate.features[i])^2
      assign(name.dist, value.dist)
      sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist)
    }
    RMSD <- sqrt(sum.sq.rel.dist / length(final.intermediate.features))
    sim.results.with.design.df$RMSD <- RMSD
    n.close.to.targets <- sum(RMSD < final.RMSD.tol, na.rm = TRUE)

    # 5. Select n.close.to.targets shortest distances
    dist.order <- order(RMSD) # Ordering the squared distances from small to big.
    selected.distances <- dist.order[1:n.close.to.targets]
    sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] 
    # 5.b. Record highest RMSD value for that the selected experiments
    calibration.list$max.RMSD[[wave]] <- max(sim.results.with.design.df.selected$RMSD)

    # 6. Record selected experiments to give to mice for this wave
    calibration.list$selected.experiments[[wave]] <- sim.results.with.design.df.selected

    # 7. Put intermediate features in dataframe format
    final.intermediate.features.df <- as.data.frame(matrix(final.intermediate.features, ncol = length(final.intermediate.features)))
    names(final.intermediate.features.df) <- y.names

    # 8. Prepare dataframe to give to mice: selected experiments plus intermediate features
    df.give.to.mice <- dplyr::full_join(dplyr::select(sim.results.with.design.df.selected,
                                                      -contains("RMSD")), # adding target to training dataset
                                        final.intermediate.features.df,
                                        by = names(final.intermediate.features.df)) # "by" statement added to avoid printing message of the variables were used for joining

    # We are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b.
    # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet
    strict.positive.params <- c(4:8)
    df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params])

    # 9. Override default predictorMatrix with a sparser matrix
    # Let's think a bit more carefully about which variables should be allowed as input for which input parameters.
    # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS.
    predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix.
    # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model.
    # 1. hivtransmission.param.f1 (2)
    # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
    # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
    # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
    # 5. person.person.eagerness.man.dist.gamma.a (0.23)
    # 6. person.person.eagerness.woman.dist.gamma.a (0.23)
    # 7. person.person.eagerness.man.dist.gamma.b (45)
    # 8. person.person.eagerness.woman.dist.gamma.b (45)
    # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
    # 10. formation.hazard.agegapry.baseline (0)
    # 11. formation.hazard.agegapry.numrel_man (-0.5)
    # 12. formation.hazard.agegapry.numrel_woman (-0.5)
    # 13. conception.alpha_base (-2.5)
    # 14. dissolution.alpha_0 (-0.52)
    # 15. dissolution.alpha_4 (-0.05))

    # And a reminder of the output vector
    # 1. AAD.male (4)
    # 2. SDAD.male (4)
    # 3. slope.male (0.7)
    # 4. WSD.male (3)
    # 5. BSD.male (1.5)
    # 6. intercept.male (-1)
    # 7. shape.nb.male (1.29)
    # 8. scale.nb.male (0.66)
    # 9. pp.cp.6months.male (0.134)
    # 10. hiv.prev.lt25.women (0.2)
    # 11. hiv.prev.lt25.men (0.1)
    # 12. hiv.prev.25.34.women (0.42)
    # 13. hiv.prev.25.34.men (0.17)
    # 14. hiv.prev.35.44.women (0.37)
    # 15. hiv.prev.35.44.men (0.24)
    # 16. exp(growthrate)) (1.01)


    predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them.
    # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage.
    predictorMatrix[1, x.offset + 10:11] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women
    predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope
    predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD
    predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD
    predictorMatrix[5, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[6, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate)
    predictorMatrix[7, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[8, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 14, 15, 16)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence,
    predictorMatrix[10, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence.
    predictorMatrix[11, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth
    predictorMatrix[12, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth
    predictorMatrix[13, x.offset + 16] <- 1 # conception.alpha_base is predicted by popgrowth
    predictorMatrix[14, x.offset + c(7, 8, 9, 16)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth
    predictorMatrix[15, x.offset + c(7, 8, 9, 16)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?)

    # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity.

    print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE)

    # 10. Let mice propose parameter values that are predicted by the intermediate features
    mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data
                      m = n.experiments, # number of imputations
                      predictorMatrix = predictorMatrix,
                      method = "norm",
                      defaultMethod = "norm",
                      maxit = maxit,
                      printFlag = FALSE)

    # 11. Turn mice proposals into a new matrix of experiments
    experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names))
    # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values
    experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params])

    # 12. Update wave count
    wave <- wave + 1
  }

  # 13. Check if intermediate features have converged to empirical targests (if not, then maxwaves is reached)
  if (final.intermediate.features == targets.empirical){
    # 14. Do more "traditional mice optimisation" for the remaining waves
    while (wave <= maxwaves){
      sim.results.simple <- simpact.parallel(model = model,
                                             actual.input.matrix = experiments,
                                             seed_count = 0,
                                             n_cluster = 8)
      # save(sim.results.simple, file = "/Users/delvaw/Documents/MiceABC/sim.results.simple.RData")
      # load(file = "sim.results.simple.RData")

      new.sim.results.with.design.df <- as.data.frame(cbind(experiments,
                                                            sim.results.simple))
      x.names <- paste0("x.", seq(1:ncol(experiments)))
      y.names <- paste0("y.", seq(1:ncol(sim.results.simple)))
      x.offset <- length(x.names)
      names(new.sim.results.with.design.df) <- c(x.names, y.names)

      new.sim.results.with.design.df <- new.sim.results.with.design.df %>% dplyr::filter(complete.cases(.))

      if (nrow(sim.results.with.design.df)==0){ # TRUE for the first wave only
        sim.results.with.design.df <- rbind(sim.results.with.design.df,
                                            new.sim.results.with.design.df)
      } else {
        sim.results.with.design.df <- rbind(dplyr::select(sim.results.with.design.df,
                                                          -contains("RMSD")),
                                            new.sim.results.with.design.df)
      }


      # save(sim.results.with.design.df, file = "/Users/delvaw/Documents/MiceABC/sim.results.with.design.df.RData")
      # load(file = "/Users/delvaw/Documents/MiceABC/sim.results.with.design.df.RData")

      #experim.median.features <- med(dplyr::select(sim.results.with.design.df, contains("y.")))$median

      # 3. Find intermediate features and RMSD.tol for which n.close.to.targets >= min.givetomice
      # targets.diff <- targets.empirical - experim.median.features # First we determine how far the empirical targets are away from the median features of the executed experiments
      # candidate.RMSD.tol <- Inf # Initially, we assume that the RMSD cut-off needs to be infinitely large to have sufficient observations to give to mice.

      # n.steps.targets <- 100
      # n.steps.RMSD.tol <- 100

      # n.close.to.targets.mat <- matrix(NA, nrow = n.steps.targets+1, ncol = n.steps.RMSD.tol+1)
      # final.RMSD.tol <- NA
      # shift.fraction <- NA



      # for (steps.intermediate.targets in 0:n.steps.targets){ # We make 10% steps from empirical targets to experimental median features
      #   candidate.intermediate.features <- targets.empirical - (targets.diff * steps.intermediate.targets) / n.steps.targets
      #   for (steps.RMSD.tol in 0:n.steps.RMSD.tol){ # We make 10% steps from RMSD.tol.max to RMSD.tol = 0
      #     RMSD.tol <- RMSD.tol.max * (n.steps.RMSD.tol - steps.RMSD.tol) / n.steps.RMSD.tol
      #     
      #     sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
      #     for (i in 1:length(candidate.intermediate.features)) {
      #       name.dist <- paste0("y.", i, ".sq.rel.dist")
      #       value.dist <- ((sim.results.with.design.df[,i + x.offset] - candidate.intermediate.features[i]) / candidate.intermediate.features[i])^2
      #       assign(name.dist, value.dist)
      #       sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist)
      #     }
      #     RMSD <- sqrt(sum.sq.rel.dist / length(candidate.intermediate.features))
      #     n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE)
      #     n.close.to.targets.mat[(1+steps.intermediate.targets), (1+steps.RMSD.tol)] <- n.close.to.targets
      #     large.enough.training.df <- n.close.to.targets >= min.givetomice
      #     if (large.enough.training.df == TRUE){
      #       if (RMSD.tol < candidate.RMSD.tol){
      #         candidate.RMSD.tol <- RMSD.tol # Updating the candidate.RMSD.tol
      #         final.RMSD.tol <- RMSD.tol
      #         final.intermediate.features <- candidate.intermediate.features
      #         shift.fraction <- steps.intermediate.targets/n.steps.targets # 0 means targets.empirical; 1 means experim.median.features
      #         #calibration.list$intermediate.features[[wave]] <- final.intermediate.features
      #         #calibration.list$steps.intermediate.targets[[wave]] <- steps.intermediate.targets 
      #         #calibration.list$RMSD.tol[[wave]] <- RMSD.tol 
      #         #calibration.list$n.close.to.targets[[wave]] <- n.close.to.targets
      #       }
      #     }
      #   }
      # }

      # n.close.to.targets.mat
      # final.RMSD.tol
      # final.intermediate.features
      # shift.fraction

      # We can calculate the alpha fraction of close-enough data, based on the final.RMSD.tol from the previous wave, and use that as a starting point?


      # 4. Calculate RMSD values for these intermediate.features and the RMSD.tol found
      sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
      for (i in 1:length(final.intermediate.features)) { # These are now equal to targets.empirical
        name.dist <- paste0("y.", i, ".sq.rel.dist")
        value.dist <- ((sim.results.with.design.df[,i + x.offset] - final.intermediate.features[i]) / final.intermediate.features[i])^2
        assign(name.dist, value.dist)
        sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist)
      }
      RMSD <- sqrt(sum.sq.rel.dist / length(final.intermediate.features))
      sim.results.with.design.df$RMSD <- RMSD
      n.close.to.targets <- sum(RMSD < final.RMSD.tol, na.rm = TRUE) # From hereon, we will keep using this same n.close.to.targets
      # alpha.fraction <- n.close.to.targets / nrow(sim.results.with.design.df) # Here we automatically set the alpha level.


      # 2b. Writing to list all the input and output of the executed experiments, so that we can plot it later
      # calibration.list$sim.results.with.design[[wave]] <- sim.results.with.design.df

      # 10. Calculate fraction of new (1-alpha frac *n.experiments) distances that are below "old" distance threshold. NOT CURRENTLY USED BECAUSE saturation.crit = 0.
      # below.old.treshold <- sim.results.with.design.df$sum.sq.rel.dist < rel.dist.cutoff
      # frac.below.old.threshold <- sum(below.old.treshold %in% TRUE) / round(n.experiments * (1-alpha))
      # if(frac.below.old.threshold < saturation.crit) saturation <- 1 # If less than the fraction saturation.crit of the new experiments a closer fit than the previous batch of retained experiments, the loop must be terminated.


      # 9. Merge with previously kept alpha fraction shortest distances
      # sim.results.with.design.df <- rbind(sim.results.with.design.df.selected, sim.results.with.design.df) # Initially sim.results.with.design.df.selected = NULL

      # 3. Keeping alpha fraction shortest distances


      # 5. Select n.close.to.targets shortest distances
      dist.order <- order(RMSD) # Ordering the squared distances from small to big.
      selected.distances <- dist.order[1:n.close.to.targets]
      sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ]
      # 5.b. Record highest RMSD value for that the selected experiments
      calibration.list$max.RMSD[[wave]] <- max(sim.results.with.design.df.selected$RMSD)

      # 6. Record selected experiments to give to mice for this wave
      calibration.list$selected.experiments[[wave]] <- sim.results.with.design.df.selected

      # 7. Put intermediate features in dataframe format
      final.intermediate.features.df <- as.data.frame(matrix(final.intermediate.features, ncol = length(final.intermediate.features)))
      names(final.intermediate.features.df) <- y.names

      # 8. Prepare dataframe to give to mice: selected experiments plus intermediate features
      df.give.to.mice <- dplyr::full_join(dplyr::select(sim.results.with.design.df.selected,
                                                        -contains("RMSD")), # adding target to training dataset
                                          final.intermediate.features.df,
                                          by = names(final.intermediate.features.df)) # "by" statement added to avoid printing message of the variables were used for joining

      # We are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b.
      # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet
      strict.positive.params <- c(4:8)
      df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params])

      # 9. Override default predictorMatrix with a sparser matrix
      # Let's think a bit more carefully about which variables should be allowed as input for which input parameters.
      # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS.
      predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix.
      # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model.
      # 1. hivtransmission.param.f1 (2)
      # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
      # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
      # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
      # 5. person.person.eagerness.man.dist.gamma.a (0.23)
      # 6. person.person.eagerness.woman.dist.gamma.a (0.23)
      # 7. person.person.eagerness.man.dist.gamma.b (45)
      # 8. person.person.eagerness.woman.dist.gamma.b (45)
      # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
      # 10. formation.hazard.agegapry.baseline (0)
      # 11. formation.hazard.agegapry.numrel_man (-0.5)
      # 12. formation.hazard.agegapry.numrel_woman (-0.5)
      # 13. conception.alpha_base (-2.5)
      # 14. dissolution.alpha_0 (-0.52)
      # 15. dissolution.alpha_4 (-0.05))

      # And a reminder of the output vector
      # 1. AAD.male (4)
      # 2. SDAD.male (4)
      # 3. slope.male (0.7)
      # 4. WSD.male (3)
      # 5. BSD.male (1.5)
      # 6. intercept.male (-1)
      # 7. shape.nb.male (1.29)
      # 8. scale.nb.male (0.66)
      # 9. pp.cp.6months.male (0.134)
      # 10. hiv.prev.lt25.women (0.2)
      # 11. hiv.prev.lt25.men (0.1)
      # 12. hiv.prev.25.34.women (0.42)
      # 13. hiv.prev.25.34.men (0.17)
      # 14. hiv.prev.35.44.women (0.37)
      # 15. hiv.prev.35.44.men (0.24)
      # 16. exp(growthrate)) (1.01)


      predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them.
      # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage.
      predictorMatrix[1, x.offset + 10:11] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women
      predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope
      predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD
      predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD
      predictorMatrix[5, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
      predictorMatrix[6, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate)
      predictorMatrix[7, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
      predictorMatrix[8, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
      predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 14, 15, 16)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence,
      predictorMatrix[10, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence.
      predictorMatrix[11, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth
      predictorMatrix[12, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth
      predictorMatrix[13, x.offset + 16] <- 1 # conception.alpha_base is predicted by popgrowth
      predictorMatrix[14, x.offset + c(7, 8, 9, 16)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth
      predictorMatrix[15, x.offset + c(7, 8, 9, 16)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?)

      # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity.

      print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE)

      # 10. Let mice propose parameter values that are predicted by the intermediate features
      mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data
                        m = n.experiments, # number of imputations
                        predictorMatrix = predictorMatrix,
                        method = "norm",
                        defaultMethod = "norm",
                        maxit = maxit,
                        printFlag = FALSE)

      # 11. Turn mice proposals into a new matrix of experiments
      experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names))
      # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values
      experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params])

      wave <- wave + 1
    }
  }

  # 15. Stop clock and return calibration list
  calibration.list$secondspassed <- proc.time() - ptm # Stop the clock
  return(calibration.list)
} 














  while (wave <= maxwaves){
    #print(c("wave", wave), quote = FALSE) # only for local testing. To be deleted after testing is completed

    #print(c(nrow(experiments), "experiments to be run"), quote = FALSE) # only for local testing. To be deleted after testing is completed


    # 1. Initially: n.experiments from sobol sequences. 7. Later, (1-alpha) fraction of n.experiments from MICE proposals



    # We should consider calculating an empirical median of the multivariate distribution of simulation outputs, and use this
    # instead of what we now refer to as the targets.master.
    sim.results.simple.complete <- as.data.frame(sim.results.simple) %>% dplyr::filter(complete.cases(.))
    experim.median.features <- med(sim.results.simple.complete)$median


    targets.diff <- targets.empirical - experim.median.features



    ####
    ## hack to avoid time-consuming initial naive simulations
    ##
    previous.data.lists = c(MICE_ABC_testoutput, MICE_ABC_testoutput.2)
    data.elements.location <- which(names(previous.data.lists) %in% "sim.results.with.design")
    previous.data <- do.call(rbind, unlist(previous.data.lists[data.elements.location], recursive = FALSE)) # The input and output of ALL the simulations run until now
    previous.data["y.9"] <- NULL # Taking out the mean degree feature
    previous.data["sum.sq.rel.dist"] <- NULL # Taking out the mean degree feature

    ##
    ##
    ####

    x.names <- paste0("x.", seq(1:ncol(experiments)))
    y.names <- paste0("y.", seq(1:ncol(sim.results.simple)))  # y.names <- paste0("y.", seq(1:16))
    x.offset <- length(x.names)

    new.sim.results.with.design.df <- previous.data    #################################### Only while developing, using this hack
    names(new.sim.results.with.design.df) <- c(x.names, y.names)

    new.sim.results.with.design.df$RMSD <- NA

    sim.results.with.design.df <- new.sim.results.with.design.df # rbind(sim.results.with.design.df, new.sim.results.with.design.df) #################################### Only while developing, using this hack


    # 2. Set intermediate targets as close to the empirical targets, but still with enough simulation data close to these intermediate targets.
    # The best candidate.targets would be the empirical targets, but if they don't have enough support, we gradually lower the bar
    candidate.targets <- targets.empirical

    sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
      for(i in 1:length(candidate.targets)) {
        nam <- paste0("y.", i, ".sq.rel.dist")
        val <- ((sim.results.with.design.df[,i + x.offset] - candidate.targets[i]) / candidate.targets[i])^2
        assign(nam, val)
        sum.sq.rel.dist <- sum.sq.rel.dist + get(nam)
      }
    RMSD <- sqrt(sum.sq.rel.dist / length(candidate.targets))
    sim.results.with.design.df$RMSD <- RMSD

    n.close.to.targets <- 0
    RMSD.tol <- 0
    while (n.close.to.targets < min.givetomice){
      RMSD.tol <- RMSD.tol + 0.01
      n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE)
    }


    if (n.close.to.targets < min.givetomice){
      n.close.to.targets <- 0
      lower.step <- 0.01
      iter <- 0
      frac.lower.bar <- iter * lower.step

      while (n.close.to.targets < min.givetomice & frac.lower.bar <= 1){
        candidate.targets <- candidate.targets - lower.step * targets.diff
        sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df))
        for(i in 1:length(candidate.targets)) {
          nam <- paste0("y.", i, ".sq.rel.dist")
          val <- ((sim.results.with.design.df[,i + x.offset] - candidate.targets[i]) / candidate.targets[i])^2
          assign(nam, val)
          sum.sq.rel.dist <- sum.sq.rel.dist + get(nam)
        }
        RMSD <- sqrt(sum.sq.rel.dist / length(candidate.targets))
        n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE)
        iter <- iter + 1
        frac.lower.bar <- iter * lower.step
      }
      sim.results.with.design.df$RMSD <- RMSD
    }
    print(frac.lower.bar)
    print(n.close.to.targets)
    # ?9. Merge with previously kept alpha fraction shortest distances
    # ?sim.results.with.design.df <- rbind(sim.results.with.design.df.selected, sim.results.with.design.df) # Initially sim.results.with.design.df.selected = NULL
    # ?Not sure if/how we are going to use this in the new algorithm

    # 3. Keeping n.close.to.targets shortest distances

    dist.order <- order(RMSD) # Ordering the squared distances from small to big.
    selected.distances <- dist.order[1:n.close.to.targets]
    sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] %>% dplyr::filter(complete.cases(.)) # Here we filter out any experiments that gave NA output. Probably not necessary because the RMSD would have been NA

    # 4. Set distance threshold at alpha-percentile best distance. 11. Update distance threshold
    rel.dist.cutoff <- max(sim.results.with.design.df.selected$RMSD)  # We update the rel.dist.cutoff, based on the alpha fraction best parametersets

    # 5. Write alpha fraction of input-output-distances and distance threshold to list.
    # Write the 
    # Write new best alpha fraction and updated threshold to list
    calibration.list$inputoutput[[wave]] <- sim.results.with.design.df.selected
    calibration.list$candidate.targets[[wave]] <- candidate.targets
    calibration.list$rel.dist.cutoff[[wave]] <- rel.dist.cutoff

    # 6. Feed MICE the best alpha fraction and ask for (1-alpha) fraction proposals based on targets
    candidate.targets.df <- as.data.frame(matrix(candidate.targets, ncol = length(candidate.targets))) # Putting target in dataframe format

    names(candidate.targets.df) <- y.names
    df.give.to.mice <- dplyr::full_join(dplyr::select(sim.results.with.design.df.selected,
                                                      -contains("RMSD")), # adding target to training dataset
                                        candidate.targets.df,
                                        by = names(candidate.targets.df)) # "by" statement added to avoid printing message of the variables were used for joining

    # We are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b.
    # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet
    strict.positive.params <- c(4:8)
    df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params])

    # Let's think a bit more carefully about which variables should be allowed as input for which input parameters.
    # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS.
    predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix.
    # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model.
    # 1. hivtransmission.param.f1 (2)
    # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
    # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
    # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
    # 5. person.person.eagerness.man.dist.gamma.a (0.23)
    # 6. person.person.eagerness.woman.dist.gamma.a (0.23)
    # 7. person.person.eagerness.man.dist.gamma.b (45)
    # 8. person.person.eagerness.woman.dist.gamma.b (45)
    # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
    # 10. formation.hazard.agegapry.baseline (0)
    # 11. formation.hazard.agegapry.numrel_man (-0.5)
    # 12. formation.hazard.agegapry.numrel_woman (-0.5)
    # 13. conception.alpha_base (-2.5)
    # 14. dissolution.alpha_0 (-0.52)
    # 15. dissolution.alpha_4 (-0.05))

    # And a reminder of the output vector
    # 1. AAD.male (4)
    # 2. SDAD.male (4)
    # 3. slope.male (0.7)
    # 4. WSD.male (3)
    # 5. BSD.male (1.5)
    # 6. intercept.male (-1)
    # 7. shape.nb.male (1.29)
    # 8. scale.nb.male (0.66)
    # 9. meandegree.male (1)
    # 10. pp.cp.6months.male (0.134)
    # 11. hiv.prev.lt25.women (0.2)
    # 12. hiv.prev.lt25.men (0.1)
    # 13. hiv.prev.25.34.women (0.42)
    # 14. hiv.prev.25.34.men (0.17)
    # 15. hiv.prev.35.44.women (0.37)
    # 16. hiv.prev.35.44.men (0.24)
    # 17. exp(growthrate)) (1.01)



    predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them.
    # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage.
    predictorMatrix[1, x.offset + 11:12] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women
    predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope
    predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD
    predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD
    predictorMatrix[5, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[6, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate)
    predictorMatrix[7, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[8, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate)
    predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 15, 16, 17)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence,
    predictorMatrix[10, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence.
    predictorMatrix[11, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth
    predictorMatrix[12, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth
    predictorMatrix[13, x.offset + 17] <- 1 # conception.alpha_base is predicted by popgrowth
    predictorMatrix[14, x.offset + c(7, 8, 10, 17)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth
    predictorMatrix[15, x.offset + c(7, 8, 10, 17)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?)

    # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity.

    print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE)

    mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data
                      m = n.experiments, #round(n.experiments * (1-alpha)), # number of imputations # Let's ask for 128 imputations
                      predictorMatrix = predictorMatrix,
                      method = "norm",
                      defaultMethod = "norm",
                      maxit = maxit,
                      printFlag = FALSE,
                      data.init = NULL)



    # experiments <- cbind(unlist(mice.test$imp$x.1), # these are the (1-alpha) fraction of n.experiments proposals
    #                      unlist(mice.test$imp$x.2))

    experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names))
    # Before we check the suitability of the new experimental input parameter values, we must backtransform the logits to probabilities
    # experiments[, probs] <- boot::inv.logit(experiments[, probs])
    # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values
    experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params])

    wave <- wave + 1
  }

  calibration.list$secondspassed <- proc.time() - ptm # Stop the clock
  return(calibration.list)
}





#data.elements.location <- which(names(previous.data.lists) %in% "sim.results.with.design")
#previous.data <- do.call(rbind, unlist(previous.data.lists[data.elements.location], recursive = FALSE)) # The input and output of ALL the simulations run until now.

  # previous.calib.lls <- as.numeric(apply(experiments, 2, min))
  # previous.calib.uls <- as.numeric(apply(experiments, 2, max))
  # # To see how the previous calibration shifted the input ranges, we should also recall the lls and uls of the calibration before the previous (which may be the initial naive range boundaries)
  # pre.experiments <- MICE_ABC_testoutput$inputoutput[[length(MICE_ABC_testoutput$inputoutput)]][, 1:length(x.names)]
  # pre.previous.calib.lls <- as.numeric(apply(pre.experiments, 2, min))
  # pre.previous.calib.uls <- as.numeric(apply(pre.experiments, 2, max))
test.MICE_ABC.incremental.results <- MICE_ABC.incremental(targets.empirical = targets.empirical,
                                                          targets.master = targets.master,
                                                          RMSD.tol = 0.2,
                                                          min.givetomice = 128,
                                                          n.experiments = 512,
                                                          lls = c(1,  0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16),
                                                          uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3,  -0.001),
                                                          model = simpact.wrapper,
                                                          maxit = 50,
                                                          maxwaves = 10)

MICE_ABC.incremental <- function(targets.empirical = targets.empirical, targets.master = targets.master, # 0.3 * targets.diff, #previous.data.lists = c(MICE_ABC_testoutput, MICE_ABC_testoutput.2, MICE_ABC_testoutput.3), RMSD.tol = 0.2, min.givetomice = 128, n.experiments = 512, lls = c(1, 0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16), uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3, -0.001), model = simpact.wrapper, maxit = 50, maxwaves = 2)

ptm <- proc.time()

# 1. hivtransmission.param.f1 (2)
# 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
# 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
# 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
# 5. person.person.eagerness.man.dist.gamma.a (0.23)
# 6. person.person.eagerness.woman.dist.gamma.a (0.23)
# 7. person.person.eagerness.man.dist.gamma.b (45)
# 8. person.person.eagerness.woman.dist.gamma.b (45)
# 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
# 10. formation.hazard.agegapry.baseline (0)
# 11. formation.hazard.agegapry.numrel_man (-0.5)
# 12. formation.hazard.agegapry.numrel_woman (-0.5)
# 13. conception.alpha_base (-2.5)
# 14. dissolution.alpha_0 (-0.52)
# 15. dissolution.alpha_4 (-0.05))

# inputs: c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05)

MICE_ABC_testoutput <- MICE_ABC(targets = targets.master + 0.1 * targets.diff, #targets.empirical, # 17 output statistics c(4, 4, 0.7, 3, 1.5, -1, 1.29, 0.66, 1, 0.134, 0.2, 0.1, 0.42, 0.17, 0.37, 0.24, 1.01)
                                 n.experiments = 128,
                                 lls = c(1,  0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16), #c(2, -1,   0.1, 2, 1, 1, 0.3), #c(0.1, -3, 0.1,  0,  0,  0, 0.2),
                                 uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3,  -0.001), #c(4, -0.2, 0.6, 8, 6, 6, 0.95), #c(5, -0.1, 1.0, 10, 10, 10, 1.0),
                                 model = simpact.wrapper,
                                 maxit = 20,
                                 maxwaves = 5, # and we can also try 10 to do the same as the 10 waves of Lenormand
                                 #reps = 5,
                                 alpha = 0.25,
                                 saturation.crit = 0)
mice.timeused <- proc.time() - ptm
mice.timeused # xxxx seconds
root.path <- dirname(getwd())
save(MICE_ABC_testoutput, file = paste0(root.path, "/MiceABC/mice.age-mixing.RData"))

Now we can use the calibrated models from this first step as starting point for the second step (targets.master + 0.2 * targets.diff). The shift in parameter values from the base model to this new starting point can give us a hint about how to choose the new range in parameter space to explore. In the exercise below we start again with LHS sampling, but we should modify the miceabc code so that the input from the inputoutput[[lastwave]] gets use as initial experiments matrix in wave 1.

ptm <- proc.time()

# 1. hivtransmission.param.f1 (2)
# 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25)
# 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0)
# 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3)
# 5. person.person.eagerness.man.dist.gamma.a (0.23)
# 6. person.person.eagerness.woman.dist.gamma.a (0.23)
# 7. person.person.eagerness.man.dist.gamma.b (45)
# 8. person.person.eagerness.woman.dist.gamma.b (45)
# 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5)
# 10. formation.hazard.agegapry.baseline (0)
# 11. formation.hazard.agegapry.numrel_man (-0.5)
# 12. formation.hazard.agegapry.numrel_woman (-0.5)
# 13. conception.alpha_base (-2.5)
# 14. dissolution.alpha_0 (-0.52)
# 15. dissolution.alpha_4 (-0.05))

# inputs: c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05)

MICE_ABC_testoutput.2 <- MICE_ABC(targets = targets.master + 0.2 * targets.diff, #targets.empirical, # 17 output statistics c(4, 4, 0.7, 3, 1.5, -1, 1.29, 0.66, 1, 0.134, 0.2, 0.1, 0.42, 0.17, 0.37, 0.24, 1.01)
                                 n.experiments = 256,
                                 lls = c(1,  0.12, -0.5, 2.8, 0.2, 0.1, 30, 30, -0.7, 2, -0.35, -0.35, -3.8, -0.6, -0.08), #c(2, -1,   0.1, 2, 1, 1, 0.3), #c(0.1, -3, 0.1,  0,  0,  0, 0.2),
                                 uls = c(1.2, 0.3, 0.4, 3.5, 0.4, 0.4, 66, 66, -0.35, 3.9, -0.1, -0.1, -2, -0.25,  -0.004), #c(4, -0.2, 0.6, 8, 6, 6, 0.95), #c(5, -0.1, 1.0, 10, 10, 10, 1.0),
                                 model = simpact.wrapper,
                                 maxit = 20,
                                 maxwaves = 8, # and we can also try 10 to do the same as the 10 waves of Lenormand
                                 #reps = 5,
                                 alpha = 0.25,
                                 saturation.crit = 0)
mice.timeused <- proc.time() - ptm
mice.timeused # xxxx seconds
MICE_ABC_testoutput.2$rel.dist.cutoff
str(MICE_ABC_testoutput.2$sim.results.with.design)
str(MICE_ABC_testoutput.2$inputoutput)
# 1. AAD.male (4)
# 2. SDAD.male (4)
# 3. slope.male (0.7)
# 4. WSD.male (3)
# 5. BSD.male (1.5)
# 6. intercept.male (-1)
# 7. shape.nb.male (1.29)
# 8. scale.nb.male (0.66)
# 9. meandegree.male (1)
# 10. pp.cp.6months.male (0.134)
# 11. hiv.prev.lt25.women (0.2)
# 12. hiv.prev.lt25.men (0.1)
# 13. hiv.prev.25.34.women (0.42)
# 14. hiv.prev.25.34.men (0.17)
# 15. hiv.prev.35.44.women (0.37)
# 16. hiv.prev.35.44.men (0.24)
# 17. exp(growthrate)) (1.01)
targets.master + 0.2 * targets.diff
MICE_ABC_testoutput.2$inputoutput[[8]][1:5, 16:32]
# initial naive input:
c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05)
MICE_ABC_testoutput.2$inputoutput[[8]][1:5, 1:15]
root.path <- dirname(getwd())
save(MICE_ABC_testoutput.2, file = paste0(root.path, "/MiceABC/mice.age-mixing.0.2.RData"))
root.path <- dirname(getwd())
load(file = paste0(root.path, "/MiceABC/incidence.rel.reduc.RData"))

Now let's subtract the incidence estimates from one another.

summary.interv <- summary(sumsmstats.interv.df)
#colMeans(sumsmstats.interv.df, na.rm = TRUE)

#lastcolumn <- ncol(sumsmstats.control.df)
#incidence.rel.reduc <- (sumsmstats.control.df[,lastcolumn] - sumsmstats.interv.df[,lastcolumn]) / sumsmstats.control.df[,lastcolumn]
#hist(incidence.rel.reduc, 20)
#summary(incidence.rel.reduc)
#predict.95.percent.interval.incidence.rel.reduc <- quantile(incidence.rel.reduc, probs = c(0.025, 0.975), na.rm = TRUE)

summary.interv.master <- summary.interv # summary(sumsmstats.interv.df)
target.vector.master <- colMeans(sumsmstats.interv.df, na.rm =TRUE)
target.vector.trainer <- head(target.vector.master, -1)
target.vector.trainer[9:length(target.vector.trainer)] <- exp(target.vector.trainer[9:length(target.vector.trainer)])
# We're turning target zeros into target ones

Now let's explore and visualise the results

root.path <- dirname(getwd())
load(file = paste0(root.path, "/MiceABC/reject.tasp.RData"))
load(file = paste0(root.path, "/MiceABC/mice.tasp.RData"))

# 1. For rejectabc
# Calculating squared relative distances and sum thereof for the ABC rejection method
rej.sum.sq.rel.dist <- rep(0, nrow(rejectabc.output$stats))
for(i in 1:length(target.vector.trainer)) {
  nam <- paste0("y.", i, ".sq.rel.dist")
  val <- ((rejectabc.output$stats[,i] - targets[i]) / targets[i])^2
  assign(nam, val)
  rej.sum.sq.rel.dist <- rej.sum.sq.rel.dist + get(nam)
}

#rejectabc.output$y.1.sq.rel.dist <- ((rejectabc.output$stats[,1] - targets[1]) / targets[1])^2
#rejectabc.output$y.2.sq.rel.dist <- ((rejectabc.output$stats[,2] - targets[2]) / targets[2])^2
#rejectabc.output$sum.sq.rel.dist <- rejectabc.output$y.1.sq.rel.dist + rejectabc.output$y.2.sq.rel.dist
dist.order <- order(rej.sum.sq.rel.dist) # Ordering the squared distances from small to big.
last.one.selected <- 5
selected.distances <- dist.order[1:last.one.selected]
best5.rejection <- rep(FALSE, length(dist.order))
best5.rejection[selected.distances] <- TRUE

abcreject.df <- as.data.frame(cbind(rejectabc.output$param,
                                    rejectabc.output$stats))
abcreject.df$best5 <- best5.rejection
abcreject.df$method <- "rejection"

xvars.names <- paste0("x.", 1:7) # This will also be used by the other methods
yvars.names <- paste0("y.", 1:39) # This will also be used by the other methods
names(abcreject.df) <- c(xvars.names, yvars.names, "best5", "method")




# 2. For Lenormandabc




# 3. For miceabc
# First for the 5 waves
xplusyvars.length <- ncol(MICE_ABC_testoutput$sim.results.with.design[[1]]) - 1 # The last variable is the sum of squared errors
miceabc_inout_all <- do.call(rbind, MICE_ABC_testoutput$sim.results.with.design)[,1:xplusyvars.length]
best5.mice <- miceabc_inout_all$y.1 %in% MICE_ABC_testoutput$inputoutput[[5]]$y.1[1:5]
abcmice.df <- as.data.frame(cbind(miceabc_inout_all,
                                  best5.mice))
abcmice.df$method <- "mice"
names(abcmice.df) <- c(names(miceabc_inout_all), "best5", "method")     ### The dataset abcmice.df will be merged with other datasets generated by the comparison methods

Now that we have calibrated the model with Rejection ABC, let's run the 5 best models many times under the intervention and control scenario, to estimate the impact of TasP.

reps <- 6 # Change to 200 reps eventually
calib.rej.df <- abcreject.df %>% dplyr::filter(., best5 == TRUE) %>% dplyr::select(., x.1:x.7)
design.matrix.calib.rej <- matrix(rep(as.numeric(unlist(calib.rej.df)), each = reps), nrow = reps * nrow(calib.rej.df), byrow = FALSE)
design.matrix.calib.rej.control <- cbind(design.matrix.calib.rej, 0)
design.matrix.calib.rej.interv <- cbind(design.matrix.calib.rej, 1)

summstats.calib.rej.control.df <- simpact.parallel(model = simpact.wrapper.master,
                                  actual.input.matrix = design.matrix.calib.rej.control,
                                  seed_count = 0,
                                  n_cluster = 8)
summstats.calib.rej.interv.df <- simpact.parallel(model = simpact.wrapper.master,
                                  actual.input.matrix = design.matrix.calib.rej.interv,
                                  seed_count = 0,
                                  n_cluster = 8)
lastcolumn.calib.rej <- ncol(summstats.calib.rej.control.df)
incidence.rel.reduc.calib.rej <- (summstats.calib.rej.control.df[,lastcolumn.calib.rej] - summstats.calib.rej.interv.df[,lastcolumn.calib.rej]) / summstats.calib.rej.control.df[,lastcolumn.calib.rej]
hist.data.incidence.rel.reduc.calib.rej <- hist(incidence.rel.reduc.calib.rej, 20)
summary(incidence.rel.reduc.calib.rej)
predict.95.percent.interval.incidence.rel.reduc.calib.rej <- quantile(incidence.rel.reduc.calib.rej, probs = c(0.025, 0.975), na.rm = TRUE)

Now that we have calibrated the model with MICE-Cali, let's run the 5 best models many times under the intervention and control scenario, to estimate the impact of TasP.

reps <- 6 # Change to 200 reps eventually
calib.mice.df <- abcmice.df %>% dplyr::filter(., best5 == TRUE) %>% dplyr::select(., x.1:x.7)
design.matrix.calib.mice <- matrix(rep(as.numeric(unlist(calib.mice.df)), each = reps), nrow = reps * nrow(calib.mice.df), byrow = FALSE)
design.matrix.calib.mice.control <- cbind(design.matrix.calib.mice, 0)
design.matrix.calib.mice.interv <- cbind(design.matrix.calib.mice, 1)

summstats.calib.mice.control.df <- simpact.parallel(model = simpact.wrapper.master,
                                  actual.input.matrix = design.matrix.calib.mice.control,
                                  seed_count = 0,
                                  n_cluster = 8)
summstats.calib.mice.interv.df <- simpact.parallel(model = simpact.wrapper.master,
                                  actual.input.matrix = design.matrix.calib.mice.interv,
                                  seed_count = 0,
                                  n_cluster = 8)
lastcolumn.calib.mice <- ncol(summstats.calib.mice.control.df)
incidence.rel.reduc.calib.mice <- (summstats.calib.mice.control.df[,lastcolumn.calib.mice] - summstats.calib.mice.interv.df[,lastcolumn.calib.mice]) / summstats.calib.mice.control.df[,lastcolumn.calib.mice]
hist.data.incidence.rel.reduc.calib.mice <- hist(incidence.rel.reduc.calib.mice, 20)
summary(incidence.rel.reduc.calib.mice)
predict.95.percent.interval.incidence.rel.reduc.calib.mice <- quantile(incidence.rel.reduc.calib.mice, probs = c(0.025, 0.975), na.rm = TRUE)
# Adding the target statistics and their inputs
target.df <- data.frame(matrix(c(3, -0.5, 0.5, 5, 3, 3, 0.75, target.vector.trainer), nrow = 1), # The inputs and targets
                        best5 = TRUE, method = "target")
names(target.df) <- c(names(miceabc_inout_all), "best5", "method")


# Now we can rbind these 4 datasets into one dataset that we can plot with ggplot2
abc.comparison.tasp.df <- rbind(abcreject.df,
                           #abcLenorm.df,
                           abcmice.df,
                           target.df)
abc.comparison.tasp.df$method <- factor(abc.comparison.tasp.df$method, labels = c("MICE", "Rejection", "Target")) # c("Lenormand", "MICE", "Rejection", "Target"))

abc.comparison.tasp.df$method <- factor(abc.comparison.tasp.df$method,levels(abc.comparison.tasp.df$method)[c(2, 1, 3)]) # c(3, 1, 2, 4) We want the levels in a particular order, for plotting and colour purposes
#levels(abc.comparison.V8.df$method)

Let's plot how the calibrated models compare against the target statistics and eachother

# columns 9:24 capture the ART coverage at times 25:40

outputcomparison.data <- as.data.frame(rbind(summstats.calib.rej.interv.df,
                          summstats.calib.mice.interv.df,
                          target.vector.master))
names(outputcomparison.data) <- c(yvars.names, "incid.rel.reduc")
outputcomparison.data$method <- c(rep(c("Rejection", "MICE"), each = nrow(summstats.calib.rej.interv.df)), "Target")

outputcomparison.data$method <- factor(outputcomparison.data$method, labels = c("MICE", "Rejection", "Target")) # c("Lenormand", "MICE", "Rejection", "Target"))

outputcomparison.data$method <- factor(outputcomparison.data$method,levels(outputcomparison.data$method)[c(1, 2, 3)]) # c(3, 1, 2, 4) We want the levels in a particular order, for plotting and colour purposes


outputcomparison.data$is.target <- c(rep(0, nrow(outputcomparison.data)-1), 1)
outputcomparison.data$unique.ID <- as.character(paste0(outputcomparison.data$y.1, outputcomparison.data$y.2, outputcomparison.data$y.3))


###
# Comparing various output statistics
library(GGally)
outputcomparison.data$is.target <- factor(outputcomparison.data$is.target)
ggpairs(outputcomparison.data[, c(1,2,5,6,7, 41:42)],
        aes(colour = method,
            shape = is.target),
        columns = 1:5,
        columnLabels = c("AAD", "SDAD", "WVAD.base", "BVAD", "HIV+ 15-50 yo"),
        upper = list(continuous = "blank")) 

ggpairs(outputcomparison.data[, c(25:30, 33, 36,39, 41:42)],
        aes(colour = method,
            shape = is.target),
        columns = 1:9,
        columnLabels = c("<100 25-30", "<100 30-35", "<100 35-40", "100-200 25-30", "100-200 30-35", "100-200 35-40", "200-350 35-40", "350-500 35-40", ">500 35-40"),
        upper = list(continuous = "blank")) 


# To plot ART coverage over time, we need to turn wide into long format
names(outputcomparison.data)[9:24] <- 25:40
outputcomparison.ARTcoverage.data <- tidyr::gather(data = outputcomparison.data,
                                            key = Time,
                                            convert = TRUE,
                                            value = ART.coverage,
                                            9:24)


ARTcoverage.comparison <- ggplot(data = outputcomparison.ARTcoverage.data,
                      aes(x = Time,
                          y = ART.coverage,
                          alpha = factor(is.target),
                          group = unique.ID,
                          col = method)) +
  geom_line() +
  scale_colour_manual(values = cb.Palette) +
  scale_alpha_discrete(breaks = NULL,
                       range = c(0.2, 1)) +
  #scale_shape_manual(breaks = NULL,
  #                   values = c(20, 10)) +
  #scale_size_manual(breaks = NULL,
  #                  values = 4) +
  xlab("Simulation time (years)") +
  ylab("ART coverage") +
  theme_bw() +
  guides(colour = guide_legend(title = "Method"))#,
                            #   override.aes = list(shape = c(20, 20, 20, 10),
                             #                      size = c(4, 4, 4, 4))))
plot(ARTcoverage.comparison)


### Now let's plot the TasP impact estimate
median.impact <- c(median(incidence.rel.reduc.calib.rej), median(incidence.rel.reduc.calib.mice), median(incidence.rel.reduc, na.rm = TRUE))
mean.impact <- c(mean(incidence.rel.reduc.calib.rej), mean(incidence.rel.reduc.calib.mice), mean(incidence.rel.reduc, na.rm = TRUE))
p25.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.25, na.rm = TRUE),
                quantile(incidence.rel.reduc.calib.mice, probs = 0.25, na.rm = TRUE),
                quantile(incidence.rel.reduc, probs = 0.25, na.rm = TRUE))
p75.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.75, na.rm = TRUE),
                quantile(incidence.rel.reduc.calib.mice, probs = 0.75, na.rm = TRUE),
                quantile(incidence.rel.reduc, probs = 0.75, na.rm = TRUE))
p025.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.025, na.rm = TRUE),
                quantile(incidence.rel.reduc.calib.mice, probs = 0.025, na.rm = TRUE),
                quantile(incidence.rel.reduc, probs = 0.025, na.rm = TRUE))
p975.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.975, na.rm = TRUE),
                quantile(incidence.rel.reduc.calib.mice, probs = 0.975, na.rm = TRUE),
                quantile(incidence.rel.reduc, probs = 0.975, na.rm = TRUE))
method <- c("Rejection", "MICE", "Master")
impact.comparison.data <- data.frame(median.impact = median.impact,
                                     mean.impact = mean.impact,
                                     p25.impact = p25.impact,
                                     p75.impact = p75.impact,
                                     p025.impact = p025.impact,
                                     p975.impact = p975.impact,
                                     method = method)

impact.comparison <- ggplot(data = impact.comparison.data,
                      aes(x = method,
                          y = median.impact,
                          col = method)) +
  geom_crossbar(aes(ymin = p25.impact, ymax = p75.impact), width = 0.2) +
  geom_errorbar(aes(ymin = p025.impact, ymax = p975.impact), width = 0.2) +
  scale_colour_manual(values = c(cb.Palette[3], cb.Palette[1], cb.Palette[2])) +
  xlab("") +
  ylim(0, 1) +
  ylab("Relative Reduction in HIV incidence") +  # TasP effect on HIV incidence in the last 2 years of the simulation
  theme_bw() +
  guides(colour = guide_legend(title = "Method"))#,
                            #   override.aes = list(shape = c(20, 20, 20, 10),
                             #                      size = c(4, 4, 4, 4))))
plot(impact.comparison)


wdelva/MiceABC documentation built on May 4, 2019, 2:04 a.m.