Important question: should we only have “general population” targets, or also hiv-status and gender specific targets? For now, only general male population.
Degree distribution of new sex partners in the last year: 5. shape parameter (r) of the negative binomial distribution = 1.29 6. scale parameter (theta) of the negative binomial distribution = 0.66 (theta = p/(1-p)) (For now we are not taking age effect into account, but if we were, e.g. via GAMM or other multi-parameter model): Extract age effect: Fitting NB distribution to data on number of new relationships in last year, with age covariate.
library(dplyr) library(MASS) library(splines) library(boot) library(haven) library(ggplot2) library(fitdistrplus) library(lmtest) library(mclust) library(depth) root.path <- dirname(getwd()) relations.data.path <- paste0(root.path, "/MiceABC/Sex Network Master (participant level) 09.08.2012.dta") data.relations <- read_dta(file = relations.data.path) #names(data.relations) data.needed <- dplyr::select(data.relations, c(id, age, race, gender, cpcount, ompcount, hivtestresult, totalpartners)) data.needed$cpcount[is.na(data.needed$cpcount)] <- 0 data.needed$ompcount[is.na(data.needed$ompcount)] <- 0 ######################################################################################### # data.filtered is the dataset for the partner turnover analysis data.filtered <- dplyr::filter(data.needed, race == 2, gender != "2", age >= 15, age < 50) data.filtered$totalpartnerslstyr <- data.filtered$cpcount + data.filtered$ompcount # Total NEW relationships formed in the last year data.filtered$age0 <- data.filtered$age - min(data.filtered$age) # We create 4 subsets, one for each race-gender stratum md.wideBM <- dplyr::filter(data.filtered, race == 2, gender == "0") # Let's ONLY WORK WITH THIS STRATUM md.wideBW <- dplyr::filter(data.filtered, race == 2, gender == "1") ################################### #Negative binomial model ################################### # Model for black men in the last year intercept.Bm <- glm.nb(totalpartnerslstyr~1, data = md.wideBM, control = glm.control(maxit = 100000, trace = 3), init.theta = 1) summary(intercept.Bm) # intercept exp(0.25), theta = 0.65 shape.nb <- exp(as.numeric(intercept.Bm$coefficients)) #(r) scale.nb <- as.numeric(intercept.Bm$theta) #(theta = p/(1-p)) # Or alternatively we can fit the negative binomial like this: fit.negbin <- fitdist(as.numeric(md.wideBM$totalpartnerslstyr), "nbinom") fit.pois <- fitdist(as.numeric(md.wideBM$totalpartnerslstyr), "pois") r.pois <- rpois(n = 1000000, lambda = as.numeric(fit.pois$estimate)) # A negative binomial distribution is the result of gamma-distributed Poisson processes. r.lambdas <- rgamma(n = 1000000, shape = shape.nb, scale = scale.nb) r.negbinom <- rpois(n = length(r.lambdas), lambda = r.lambdas) random.nb <- rnbinom(n = 10000, size = shape.nb, mu = shape.nb * scale.nb) hist(md.wideBM$totalpartnerslstyr) hist(r.negbinom) hist(random.nb) mean(md.wideBM$totalpartnerslstyr) mean(r.negbinom) mean(random.nb) round(prop.table(table(as.numeric(md.wideBM$totalpartnerslstyr))), 2) round(prop.table(table(random.nb)), 2) round(prop.table(table(r.negbinom)), 2) round(prop.table(table(r.pois)), 2) finalBm <- glm.nb(totalpartnerslstyr~ns(age0, 3), data=md.wideBM, control=glm.control(maxit=100000, trace = 3), init.theta=0.5) summary(finalBm) lrtest(intercept.Bm, finalBm) ############################################################################### # Made up data set to predict the expected number of sexual partners per age ############################################################################## madeforBM <- data.frame(age0 = 0:34, gender = "man", race = "African") # ~ 15 to 49 year old men madeforBM$totalpartnerslstyr <- NA ######################################################################################## tryBM <- predict(finalBm, newdata=madeforBM, type="link", se.fit = TRUE) madeforBM$try <- exp(tryBM$fit); madeforBM$try.low <- exp(tryBM$fit - 1.96 * tryBM$se.fit); madeforBM$try.high <- exp(tryBM$fit + 1.96 * tryBM$se.fit); ### Final step: plotting the results plot(seq(15, 49), madeforBM$try, type="l", col="navyblue",main="Black men",xlab="Age",ylab="Partnerslastyear", ylim = c(0, 4)) #points(seq(15, 49), madeforBM$try.low, col="navyblue") #points(seq(15, 49), madeforBM$try.high, col="navyblue")
HIV prevalence by age and gender, by age-groups in 2011 (30 years after introducing HIV) as per Beauclair 2017 (journal unknown yet) 8. HIV prevalence in women < 25 yo: 0.2 9. HIV prevalence in men < 25 yo: 0.1 10. HIV prevalence in women 25 <= x < 35: 0.42 11. HIV prevalence in men 25 <= x < 35: 0.17 12. HIV prevalence in women 35 <= x < 45: 0.37 13. HIV prevalence in men 35 <= x < 45: 0.24
wsd.general <- 3 bsd.general <- 1.5 slope.general <- 0.7 intercept.general <- -1 # average age of partners of 15 year-old boys is 16 shape.nb <- 1.29 scale.nb <- 0.66 pp.cp.6months.men <- 0.134 hiv.prev.lt25.women <- 0.2 hiv.prev.lt25.men <- 0.1 hiv.prev.25.34.women <- 0.42 hiv.prev.25.34.men <- 0.17 hiv.prev.35.44.women <- 0.37 hiv.prev.35.44.men <- 0.24
library(devtools) #install_github("wdelva/RSimpactHelp") library(RSimpactCyan) library(RSimpactHelper) library(ggplot2) library(GGally) library(lmtest) library(mice) library(miceadds) library(parallel) library(randtoolbox) library(EasyABC) library(dplyr) library(tidyr) library(nlme) library(lme4) library(boot) library(data.table) # Let's create a colour palette that is colourblind-friendly # The palette with black: cb.Palette <- c("#F0E442", "#D55E00", "#0072B2", "#E69F00", "#999999") # yellow red blue orange grey # First we define a simpact wrapper function that takes as argument a vector of model parameter values and returns a vector of summary statistics # A wrapper function to run Simpact once # input.vector <- c(0, 3, -0.5, 0) simpact.wrapper.master <- function(inputvector = input.vector){ library(RSimpactHelper) library(RSimpactCyan) library(magrittr) library(dplyr) library(nlme) agemixing.lme.errFunction <- function(e) { return(list()) } agemixing.lme.fitter <- function(data = dplyr::filter(agemix.model[[1]], Gender =="male")){ men.lme <- lme(pagerelform ~ agerelform0, data = data, control=lmeControl(maxIter=10000, returnObject=TRUE, opt = "optim"), random = ~1 | ID, method = "REML", weight = varPower(value = 0.5, form = ~agerelform0 + 1)) } scenario <- function(interventionlist, tasp.indicator) { switch(tasp.indicator+1, interventionlist[1:4], interventionlist[1:5]) } cd4.atARTinit <- function(datalist = datalist, agegroup = c(15, 30), timewindow = c(15, 30), cd4count=350, site="All"){ cd4.atARTinit <- age.group.time.window(datalist = datalist, agegroup = agegroup, timewindow = timewindow, site="All") cd4.atARTinit <- subset(cd4.atARTinit, TreatTime !=Inf) #HIV positive individuals raw.df <- data.frame(cd4.atARTinit) art.df <- subset(datalist$ttable, ID %in% cd4.atARTinit$ID & TStart > timewindow[1] & TStart < timewindow[2]) ##What if the person dropped out and come back again? art.df <- data.frame(dplyr::summarise(dplyr::group_by(art.df, ID, Gender), CD4atARTstart = min(CD4atARTstart))) #indicate those who started their treatment when their CD4 count was below a given threshold art.df <- art.df %>% dplyr::mutate(ART.start.CD4 = CD4atARTstart < cd4count) # Now we apply the left_join dplyr function to add the ART status to raw.df. raw.df <- dplyr::left_join(x = raw.df, y = art.df, by = c("ID", "Gender")) #provide a summary of those that are on treatment and those that started below a threshold cd4count.atARTInit <- data.frame(dplyr::summarise(dplyr::group_by(raw.df, Gender), TotalCases = n(), LessCD4initThreshold =sum(ART.start.CD4))) return(cd4count.atARTInit) } root.path <- dirname(getwd()) destDir <- paste0(root.path, "/temp") age.distr <- agedistr.creator(shape = 5, scale = 65) cfg.list <- input.params.creator(population.eyecap.fraction = 0.21,#1, population.simtime = 40, population.nummen = 2500, population.numwomen = 2500, hivseed.time = 10, hivseed.type = "amount", hivseed.amount = 30, hivseed.age.min = 20, hivseed.age.max = 50, hivtransmission.param.a = -1, hivtransmission.param.b = -90, hivtransmission.param.c = 0.5, hivtransmission.param.f1 = log(2), hivtransmission.param.f2 = log(log(1.4) / log(2)) / 5, formation.hazard.agegapry.gap_factor_man_age = -0.01, #-0.01472653928518528523251061, formation.hazard.agegapry.gap_factor_woman_age = -0.01, #-0.0726539285185285232510561, formation.hazard.agegapry.meanage = -0.025, formation.hazard.agegapry.gap_factor_man_const = 0, formation.hazard.agegapry.gap_factor_woman_const = 0, formation.hazard.agegapry.gap_factor_man_exp = -1, #-6,#-1.5, formation.hazard.agegapry.gap_factor_woman_exp = -1, #-6,#-1.5, formation.hazard.agegapry.gap_agescale_man = 0.25, formation.hazard.agegapry.gap_agescale_woman = 0.25,#-0.30000007,#-0.03, debut.debutage = 15, conception.alpha_base = -2.5#, #person.art.accept.threshold.dist.fixed.value = 0 ) # cfg.list["diagnosis.baseline"] <- 100 # cfg.list["diagnosis.HSV2factor"] <- 0 # cfg.list["diagnosis.agefactor"] <- 0 # cfg.list["diagnosis.beta"] <- 0 # cfg.list["diagnosis.diagpartnersfactor"] <- 0 # cfg.list["diagnosis.genderfactor"] <- 0 # cfg.list["diagnosis.isdiagnosedfactor"] <- 0 # cfg.list["diagnosis.t_max"] <- 200 cfg.list["formation.hazard.agegapry.baseline"] <- 2 cfg.list["mortality.aids.survtime.C"] <- 65 cfg.list["mortality.aids.survtime.k"] <- -0.2 cfg.list["monitoring.fraction.log_viralload"] <- 0.3 cfg.list["dropout.interval.dist.uniform.min"] <- 100 cfg.list["dropout.interval.dist.uniform.max"] <- 200 cfg.list["person.agegap.man.dist.type"] <- "normal" #fixed #cfg.list["person.agegap.man.dist.fixed.value"] <- -6 cfg.list["person.agegap.woman.dist.type"] <- "normal" #"fixed" #cfg.list["person.agegap.woman.dist.fixed.value"] <- -6 cfg.list["mortality.aids.survtime.C"] <- 65 cfg.list["mortality.aids.survtime.k"] <- -0.2 #cfg.list["monitoring.cd4.threshold"] <- 0 cfg.list["person.agegap.man.dist.normal.mu"] <- 0 cfg.list["person.agegap.woman.dist.normal.mu"] <- 0 cfg.list["person.agegap.man.dist.normal.sigma"] <- inputvector[2] ######### 3 cfg.list["person.agegap.woman.dist.normal.sigma"] <- inputvector[2] ######### 3 cfg.list["person.survtime.logoffset.dist.type"] <- "normal" cfg.list["person.survtime.logoffset.dist.normal.mu"] <- 0 cfg.list["person.survtime.logoffset.dist.normal.sigma"] <- 0.1 cfg <- cfg.list seedid <- inputvector[1] #cfg["person.agegap.man.dist.fixed.value"] <- -2 # inputvector[2] #cfg["person.agegap.woman.dist.fixed.value"] <- -2 # inputvector[2] cfg["formation.hazard.agegapry.gap_factor_man_exp"] <- inputvector[3] ######### -0.5 cfg["formation.hazard.agegapry.gap_factor_woman_exp"] <- inputvector[3] ######### -0.5 # Let's introduce ART, and evaluate whether the HIV prevalence drops less rapidly art.intro <- list() art.intro["time"] <- 25 art.intro["person.art.accept.threshold.dist.fixed.value"] <- inputvector[4] ######### 0.5 art.intro["diagnosis.baseline"] <- 0#100 art.intro["monitoring.cd4.threshold"] <- 100 # 1200 #art.intro["monitoring.interval.piecewise.cd4s"] <- "0,1300" # Gradual increase in CD4 threshold. in 2007:200. in 2010:350. in 2013:500 art.intro2 <- list() art.intro2["time"] <- 25 + inputvector[5] ######### 30 art.intro2["monitoring.cd4.threshold"] <- 200 art.intro3 <- list() art.intro3["time"] <- inputvector[4] + inputvector[5] + inputvector[6] ########### 33 art.intro3["monitoring.cd4.threshold"] <- 350 art.intro4 <- list() art.intro4["time"] <- inputvector[4] + inputvector[5] + inputvector[6] + inputvector[7] ########### 36 art.intro4["monitoring.cd4.threshold"] <- 500 art.intro5 <- list() art.intro5["time"] <- 38 art.intro5["monitoring.cd4.threshold"] <- 5000 # This is equivalent to immediate access art.intro5["person.art.accept.threshold.dist.fixed.value"] <- inputvector[8] ########### 0.75 tasp.indicator <- inputvector[9] # 1 if the scenario is TasP, 0 if the scenario is current status interventionlist <- list(art.intro, art.intro2, art.intro3, art.intro4, art.intro5) intervention <- scenario(interventionlist, tasp.indicator) results <- simpact.run(configParams = cfg, destDir = destDir, agedist = age.distr, seed = seedid, #, Introducing ART has helped to keep the prevalence high intervention = intervention) datalist.agemix <- readthedata(results) agemix.df <- agemix.df.maker(datalist.agemix) agemix.model <- pattern.modeller(dataframe = agemix.df, agegroup = c(15, 50), timepoint = datalist.agemix$itable$population.simtime[1], timewindow = 3) men.lme <- tryCatch(agemixing.lme.fitter(data = dplyr::filter(agemix.model[[1]], Gender =="male")), error = agemixing.lme.errFunction) # Returns an empty list if the lme model can't be fitted bignumber <- NA # let's try if NA works (instead of 9999 for example) AAD <- ifelse(length(men.lme) > 0, mean(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber) SDAD <- ifelse(length(men.lme) > 0, sd(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber) powerm <- ifelse(length(men.lme) > 0, as.numeric(attributes(men.lme$apVar)$Pars["varStruct.power"]), bignumber) slopem <- ifelse(length(men.lme) > 0, summary(men.lme)$tTable[2, 1], bignumber) WVAD.base <- ifelse(length(men.lme) > 0, men.lme$sigma^2, bignumber) BVAD <- ifelse(length(men.lme) > 0, getVarCov(men.lme)[1,1], bignumber) #agegap.mean <- mean(datalist.agemix$rtable$AgeGap) #agegap.sd <- sd(datalist.agemix$rtable$AgeGap) hivprev.15.50 <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(15, 50), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[3] # ART coverage vector ART.cov.eval.timepoints <- seq(from = intervention[[1]]$time, to = datalist.agemix$itable$population.simtime[1]) ART.cov.vector <- rep(NA, length(ART.cov.eval.timepoints)) for (art.cov.index in 1:length(ART.cov.vector)){ ART.cov.vector[art.cov.index] <- sum(ART.coverage.calculator(datalist = datalist.agemix, agegroup = c(15, 50), timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.onART) / sum(ART.coverage.calculator(datalist = datalist.agemix, agegroup = c(15, 50), timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.cases) } # Vector of the fraction of people who started their treatment when their CD4count was between specified values cd4.atARTinit.fractions.timepoints <- seq(from = intervention[[1]]$time + 5, to = datalist.agemix$itable$population.simtime[1], by = 5) #30, 35, 40 cd4.any.vector <- cd4.lt100.vector <- cd4.100.200.vector <- cd4.200.350.vector <- cd4.350.500.vector <- cd4.500plus.vector <- rep(NA, length(cd4.atARTinit.fractions.timepoints)) for (cd4.index in 1:length(cd4.lt100.vector)){ cd4.lt100.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, TStart < cd4.atARTinit.fractions.timepoints[cd4.index], TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), CD4atARTstart < 100) %>% select(ID) %>% unique() %>% nrow() cd4.100.200.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, TStart < cd4.atARTinit.fractions.timepoints[cd4.index], TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), CD4atARTstart >= 100, CD4atARTstart < 200) %>% select(ID) %>% unique() %>% nrow() cd4.200.350.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, TStart < cd4.atARTinit.fractions.timepoints[cd4.index], TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), CD4atARTstart >= 200, CD4atARTstart < 350) %>% select(ID) %>% unique() %>% nrow() cd4.350.500.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, TStart < cd4.atARTinit.fractions.timepoints[cd4.index], TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), CD4atARTstart >= 350, CD4atARTstart < 500) %>% select(ID) %>% unique() %>% nrow() cd4.500plus.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, TStart < cd4.atARTinit.fractions.timepoints[cd4.index], TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), CD4atARTstart >= 500) %>% select(ID) %>% unique() %>% nrow() cd4.any.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, TStart < cd4.atARTinit.fractions.timepoints[cd4.index], TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5)) %>% select(ID) %>% unique() %>% nrow() } incid.final <- incidence.calculator(datalist = datalist.agemix, agegroup = c(15, 50), timewindow = c((datalist.agemix$itable$population.simtime[1] - 2), datalist.agemix$itable$population.simtime[1]), only.active = "No")$incidence[3] growthrate <- pop.growth.calculator(datalist = datalist.agemix, timewindow = c(0, datalist.agemix$itable$population.simtime[1])) outputvector <- c(AAD, SDAD, powerm, slopem, WVAD.base, BVAD, hivprev.15.50, growthrate, ART.cov.vector, cd4.lt100.vector/cd4.any.vector, cd4.100.200.vector/cd4.any.vector, cd4.200.350.vector/cd4.any.vector, cd4.350.500.vector/cd4.any.vector, cd4.500plus.vector/cd4.any.vector, incid.final) return(outputvector) } simpact.wrapper <- function(inputvector = input.vector){ # This function does not produce the incidence and does not have an input for the model scenario library(RSimpactHelper) library(RSimpactCyan) library(magrittr) library(dplyr) library(nlme) library(lme4) library(fitdistrplus) library(lmtest) library(tidyr) library(data.table) agemixing.lme.errFunction <- function(e) { return(list()) } simpact.errFunction <-function(e){ if (length(grep("NaN",e$message)) != 0){ return(list()) } } # agemixing.lme.fitter <- function(data = dplyr::filter(agemix.model[[1]], Gender =="male")){ # men.lme <- lme(pagerelform ~ agerelform0, # data = data, # control=lmeControl(maxIter=10000, returnObject=TRUE, opt = "optim"), # random = ~1 | ID, # method = "REML", # weight = varPower(value = 0.5, form = ~agerelform0 + 1)) # } ampmodel <- function(data = dplyr::filter(agemix.model[[1]], Gender =="male")) { lmer(pagerelform ~ agerelform0 + (1 | ID), data = data, REML = TRUE) } bvar <- function(model) { # Outputs a df with between-subject variance, upr & lwr limits # Must take an merMod object bsd <- as.numeric(as.data.frame(VarCorr(model))[1,5]) } degree.df.maker <- function (dataframe.df, agegroup = c(15, 30), hivstatus = 0, survey.time = 15, window.width = 1, gender.degree = "female", only.new = TRUE) { dataframe.rels.df <- dataframe.df dataframe.rels.df <- dplyr::select(dataframe.rels.df, -episodeorder, -FormTime, -DisTime) dataframe.rels.df <- unique.data.frame(dataframe.rels.df) rels.form.dis.df <- dplyr::summarise(dplyr::group_by(dataframe.df, relid), FormTime = min(FormTime), DisTime = max(DisTime)) dfnew <- dplyr::left_join(x = dataframe.rels.df, y = rels.form.dis.df, by = "relid") dfnew <- dplyr::filter(dfnew, TOD > survey.time) { if (hivstatus == 0) { dfnew <- dplyr::filter(dfnew, InfectTime > survey.time) } else if (hivstatus == 1) { dfnew <- dplyr::filter(dfnew, InfectTime <= survey.time) } } { if (only.new) { dfnew <- dplyr::filter(dfnew, FormTime >= survey.time - window.width, FormTime < survey.time, DisTime > survey.time - window.width, Gender == gender.degree, survey.time - TOB >= agegroup[1], survey.time - TOB < agegroup[2]) } else { dfnew <- dplyr::filter(dfnew, FormTime < survey.time, DisTime > survey.time - window.width, Gender == gender.degree, survey.time - TOB >= agegroup[1], survey.time - TOB < agegroup[2]) } } uniqueOut <- dfnew %>% dplyr::select(ID, relid) %>% distinct %>% rename(Degree = relid) if (dim(uniqueOut)[1] != 0) { degreedata.df <- aggregate(Degree ~ ID, data = uniqueOut, length) } else { degreedata.df <- data.frame(ID = NA, Degree = NA) } return(degreedata.df) } # scenario <- function(interventionlist, tasp.indicator) { # switch(tasp.indicator+1, interventionlist[1:4], interventionlist[1:5]) # } concurr.pointprev.calculator <- function(datalist, timepoint = datalist$itable$population.simtime[1] - 0.5){ #DTP = datalist$ptable, DTE = datalist$etable, cfg = configfile){ #DTP <- datalist$ptable #DTL <- datalist$ltable output <- data.table() DTalive.infected <- alive.infected(datalist = datalist, timepoint = timepoint, site = "All") # First we only take the data of people who were alive at time_i agemix.df <- agemix.df.maker(datalist) degrees.df <- degree.df.maker(dataframe.df = agemix.df, agegroup = c(15, 50), hivstatus = 2, survey.time = timepoint, window.width = 0, gender.degree = "male", only.new = FALSE) number.people.with.cps <- sum(degrees.df$Degree > 1) popsize <- nrow(DTalive.infected) concurr.pointprevalence <- number.people.with.cps / popsize #output <- rbind(output, concurrdata) #outputlist <- list(output=output) #return(outputlist) return(concurr.pointprevalence) } cd4.atARTinit <- function(datalist = datalist, agegroup = c(15, 30), timewindow = c(15, 30), cd4count=350, site="All"){ cd4.atARTinit <- age.group.time.window(datalist = datalist, agegroup = agegroup, timewindow = timewindow, site="All") cd4.atARTinit <- subset(cd4.atARTinit, TreatTime !=Inf) #HIV positive individuals raw.df <- data.frame(cd4.atARTinit) art.df <- subset(datalist$ttable, ID %in% cd4.atARTinit$ID & TStart > timewindow[1] & TStart < timewindow[2]) ##What if the person dropped out and come back again? art.df <- data.frame(dplyr::summarise(dplyr::group_by(art.df, ID, Gender), CD4atARTstart = min(CD4atARTstart))) #indicate those who started their treatment when their CD4 count was below a given threshold art.df <- art.df %>% dplyr::mutate(ART.start.CD4 = CD4atARTstart < cd4count) # Now we apply the left_join dplyr function to add the ART status to raw.df. raw.df <- dplyr::left_join(x = raw.df, y = art.df, by = c("ID", "Gender")) #provide a summary of those that are on treatment and those that started below a threshold cd4count.atARTInit <- data.frame(dplyr::summarise(dplyr::group_by(raw.df, Gender), TotalCases = n(), LessCD4initThreshold =sum(ART.start.CD4))) return(cd4count.atARTInit) } root.path <- dirname(getwd()) destDir <- paste0(root.path, "/temp") age.distr <- agedistr.creator(shape = 5, scale = 65) cfg.list <- input.params.creator(population.eyecap.fraction = 0.2, #0.21,#1, population.simtime = 15, #40, population.nummen = 2500, population.numwomen = 2500, hivseed.time = 10, hivseed.type = "amount", hivseed.amount = 25, #30, hivseed.age.min = 20, hivseed.age.max = 50, hivtransmission.param.a = -1, hivtransmission.param.b = -90, hivtransmission.param.c = 0.5, hivtransmission.param.f1 = log(inputvector[2]) , #log(2), hivtransmission.param.f2 = log(log(sqrt(inputvector[2])) / log(inputvector[2])) / 5, #log(log(1.4) / log(2)) / 5, formation.hazard.agegapry.gap_factor_man_age = -0.01, #-0.01472653928518528523251061, formation.hazard.agegapry.gap_factor_woman_age = -0.01, #-0.0726539285185285232510561, formation.hazard.agegapry.meanage = -0.025, formation.hazard.agegapry.gap_factor_man_const = 0, formation.hazard.agegapry.gap_factor_woman_const = 0, formation.hazard.agegapry.gap_factor_man_exp = -1, #-6,#-1.5, formation.hazard.agegapry.gap_factor_woman_exp = -1, #-6,#-1.5, formation.hazard.agegapry.gap_agescale_man = inputvector[3], # 0.25, formation.hazard.agegapry.gap_agescale_woman = inputvector[3], # 0.25,#-0.30000007,#-0.03, debut.debutage = 15, conception.alpha_base = inputvector[14]#-2.5#, #person.art.accept.threshold.dist.fixed.value = 0 ) # cfg.list["diagnosis.baseline"] <- 100 # cfg.list["diagnosis.HSV2factor"] <- 0 # cfg.list["diagnosis.agefactor"] <- 0 # cfg.list["diagnosis.beta"] <- 0 # cfg.list["diagnosis.diagpartnersfactor"] <- 0 # cfg.list["diagnosis.genderfactor"] <- 0 # cfg.list["diagnosis.isdiagnosedfactor"] <- 0 # cfg.list["diagnosis.t_max"] <- 200 cfg.list["formation.hazard.agegapry.baseline"] <- 2 cfg.list["mortality.aids.survtime.C"] <- 65 cfg.list["mortality.aids.survtime.k"] <- -0.2 cfg.list["monitoring.fraction.log_viralload"] <- 0.3 cfg.list["dropout.interval.dist.uniform.min"] <- 100 cfg.list["dropout.interval.dist.uniform.max"] <- 200 cfg.list["person.agegap.man.dist.type"] <- "normal" #fixed #cfg.list["person.agegap.man.dist.fixed.value"] <- -6 cfg.list["person.agegap.woman.dist.type"] <- "normal" #"fixed" #cfg.list["person.agegap.woman.dist.fixed.value"] <- -6 cfg.list["mortality.aids.survtime.C"] <- 65 cfg.list["mortality.aids.survtime.k"] <- -0.2 cfg.list["monitoring.cd4.threshold"] <- 0 cfg.list["person.agegap.man.dist.normal.mu"] <- inputvector[4] # 0 cfg.list["person.agegap.woman.dist.normal.mu"] <- inputvector[4] # 0 cfg.list["person.agegap.man.dist.normal.sigma"] <- inputvector[5] ######### 3 cfg.list["person.agegap.woman.dist.normal.sigma"] <- inputvector[5] ######### 3 cfg.list["person.eagerness.man.dist.gamma.a"] <- inputvector[6] ######### 0.23 cfg.list["person.eagerness.woman.dist.gamma.a"] <- inputvector[7] ######### 0.23 cfg.list["person.eagerness.man.dist.gamma.b"] <- inputvector[8] ######### 45 cfg.list["person.eagerness.woman.dist.gamma.b"] <- inputvector[9] ######### 45 cfg.list["person.survtime.logoffset.dist.type"] <- "normal" cfg.list["person.survtime.logoffset.dist.normal.mu"] <- 0 cfg.list["person.survtime.logoffset.dist.normal.sigma"] <- 0.1 cfg <- cfg.list cfg["population.maxevents"] <- as.numeric(cfg.list["population.simtime"][1]) * as.numeric(cfg.list["population.nummen"][1]) * 3 cfg["monitoring.fraction.log_viralload"] <- 0.3 cfg["person.vsp.toacute.x"] <- 5 # See Bellan PLoS Medicine seedid <- inputvector[1] #cfg["person.agegap.man.dist.fixed.value"] <- -2 # inputvector[2] #cfg["person.agegap.woman.dist.fixed.value"] <- -2 # inputvector[2] cfg["formation.hazard.agegapry.gap_factor_man_exp"] <- inputvector[10] ######### -0.5 cfg["formation.hazard.agegapry.gap_factor_woman_exp"] <- inputvector[10] ######### -0.5 cfg["formation.hazard.agegapry.baseline"] <- inputvector[11] cfg["formation.hazard.agegapry.numrel_man"] <- inputvector[12] cfg["formation.hazard.agegapry.numrel_woman"] <- inputvector[13] # inputvector[14] is conception.alpha.base (higher up) cfg["dissolution.alpha_0"] <- inputvector[15] cfg["dissolution.alpha_4"] <- inputvector[16] # Let's introduce ART, and evaluate whether the HIV prevalence drops less rapidly art.intro <- list() art.intro["time"] <- 25 art.intro["person.art.accept.threshold.dist.fixed.value"] <- 0.5 # inputvector[4] ######### 0.5 art.intro["diagnosis.baseline"] <- 0#100 art.intro["monitoring.cd4.threshold"] <- 100 # 1200 #art.intro["monitoring.interval.piecewise.cd4s"] <- "0,1300" # Gradual increase in CD4 threshold. in 2007:200. in 2010:350. in 2013:500 art.intro2 <- list() art.intro2["time"] <- 25 + 5 # inputvector[5] ######### 30 art.intro2["monitoring.cd4.threshold"] <- 200 art.intro3 <- list() art.intro3["time"] <- 33 # inputvector[4] + inputvector[5] + inputvector[6] ########### 33 art.intro3["monitoring.cd4.threshold"] <- 350 art.intro4 <- list() art.intro4["time"] <- 3 # inputvector[4] + inputvector[5] + inputvector[6] + inputvector[7] ########### 36 art.intro4["monitoring.cd4.threshold"] <- 500 art.intro5 <- list() art.intro5["time"] <- 38 art.intro5["monitoring.cd4.threshold"] <- 5000 # This is equivalent to immediate access art.intro5["person.art.accept.threshold.dist.fixed.value"] <- 0.5 # inputvector[8] ########### 0.75 # tasp.indicator <- inputvector[9] # 1 if the scenario is TasP, 0 if the scenario is current status interventionlist <- list(art.intro, art.intro2, art.intro3, art.intro4, art.intro5) intervention <- interventionlist # scenario(interventionlist, tasp.indicator) results <- tryCatch(simpact.run(configParams = cfg, destDir = destDir, agedist = age.distr, seed = seedid, #, Introducing ART has helped to keep the prevalence high intervention = intervention), error = simpact.errFunction) # results <- simpact.run(configParams = cfg, # destDir = destDir, # agedist = age.distr, # seed = seedid, #, Introducing ART has helped to keep the prevalence high # intervention = intervention) if (length(results) == 0){ outputvector <- rep(NA, 16) } else { if (as.numeric(results["eventsexecuted"]) >= (as.numeric(cfg["population.maxevents"]) - 1)) { outputvector <- rep(NA, 16) } else { datalist.agemix <- readthedata(results) agemix.df <- agemix.df.maker(datalist.agemix) agemix.model <- pattern.modeller(dataframe = agemix.df, agegroup = c(15, 50), timepoint = datalist.agemix$itable$population.simtime[1], timewindow = 1)#3) # men.lme <- tryCatch(agemixing.lme.fitter(data = dplyr::filter(agemix.model[[1]], Gender =="male")), # error = agemixing.lme.errFunction) # Returns an empty list if the lme model can't be fitted men.lmer <- tryCatch(ampmodel(data = dplyr::filter(agemix.model[[1]], Gender =="male")), error = agemixing.lme.errFunction) # Returns an empty list if the lme model can't be fitted bignumber <- NA # let's try if NA works (instead of 9999 for example) AAD.male <- ifelse(length(men.lmer) > 0, mean(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber) SDAD.male <- ifelse(length(men.lmer) > 0, sd(dplyr::filter(agemix.model[[1]], Gender =="male")$AgeGap), bignumber) #powerm <- ifelse(length(men.lme) > 0, as.numeric(attributes(men.lme$apVar)$Pars["varStruct.power"]), bignumber) slope.male <- ifelse(length(men.lmer) > 0, summary(men.lmer)$coefficients[2, 1], bignumber) #summary(men.lmer)$tTable[2, 1], bignumber) WSD.male <- ifelse(length(men.lmer) > 0, summary(men.lmer)$sigma, bignumber) #WVAD.base <- ifelse(length(men.lme) > 0, men.lme$sigma^2, bignumber) BSD.male <- ifelse(length(men.lmer) > 0, bvar(men.lmer), bignumber) # Bad name for the function because it actually extracts between subject standard deviation # BVAD <- ifelse(length(men.lmer) > 0, getVarCov(men.lme)[1,1], bignumber) intercept.male <- ifelse(length(men.lmer) > 0, summary(men.lmer)$coefficients[1,1] - 15, bignumber) #agegap.mean <- mean(datalist.agemix$rtable$AgeGap) #agegap.sd <- sd(datalist.agemix$rtable$AgeGap) ### # Now we fit the negative binomial distribution ### degrees.df <- degree.df.maker(dataframe.df = agemix.df, agegroup = c(18, 50), hivstatus = 2, survey.time = datalist.agemix$itable$population.simtime[1], window.width = 1, gender.degree = "male", only.new = TRUE) # If we want to know % of men who had 0 partners in the past 12 months, # We need to compare nrow(degree.df) with the number of 18-50 year old men #HIV negative women # that were alive at the time of the survey allsurveymen <- dplyr::filter(datalist.agemix$ptable, Gender == 1, # Male TOD > datalist.agemix$itable$population.simtime[1], # Still alive at the time of the survey TOB <= datalist.agemix$itable$population.simtime[1] - 18, # Not younger than 18 at the time of the survey TOB > datalist.agemix$itable$population.simtime[1] - 50)#, # Not older than 50 at the time of the survey #InfectTime > datalist.agemix$itable$population.simtime[1]) # HIV negative at the time of the survey # Creating the vector of degrees degree.vector <- c(rep(0, times = (nrow(allsurveymen) - nrow(degrees.df))), degrees.df$Degree) meandegree.male <- mean(degree.vector) # hist(degree.vector, 10) # degree.vector <- rnbinom(n = 100, size =1.28, mu = 0.66) # PLACEHOLDER FOR NOW # Fitting the negative binomial distribution to this vector fit.negbin <- tryCatch(fitdist(degree.vector, "nbinom"), error = agemixing.lme.errFunction) shape.nb.male <- ifelse(length(fit.negbin) > 0, as.numeric(fit.negbin$estimate[2]), bignumber) scale.nb.male <- ifelse(length(fit.negbin) > 0, as.numeric(fit.negbin$estimate[1]), bignumber) #(theta = p/(1-p)) # degrees.df <- data.frame(degree = degree.vector) # # Model for black men in the last year # nb.fit <- glm.nb(degree~1, # data = degrees.df, # control = glm.control(maxit = 100000, trace = 3), # init.theta = 1) # summary(nb.fit) # intercept exp(0.25), theta = 0.65 # shape.nb <- exp(as.numeric(nb.fit$coefficients)) #(r) # scale.nb <- as.numeric(nb.fit$theta) #(theta = p/(1-p)) # Concurrency point prevalence 6 months before a survey, among men pp.cp.6months.male <- concurr.pointprev.calculator(datalist = datalist.agemix, timepoint = datalist$itable$population.simtime[1] - 0.5) hiv.prev.lt25.women <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(15, 25), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[2] hiv.prev.lt25.men <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(15, 25), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[1] hiv.prev.25.34.women <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(25, 35), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[2] hiv.prev.25.34.men <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(25, 35), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[1] hiv.prev.35.44.women <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(35, 45), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[2] hiv.prev.35.44.men <- prevalence.calculator(datalist = datalist.agemix, agegroup = c(35, 45), timepoint = datalist.agemix$itable$population.simtime[1])$pointprevalence[1] # ART coverage vector # ART.cov.eval.timepoints <- seq(from = intervention[[1]]$time, # to = datalist.agemix$itable$population.simtime[1]) # ART.cov.vector <- rep(NA, length(ART.cov.eval.timepoints)) # for (art.cov.index in 1:length(ART.cov.vector)){ # ART.cov.vector[art.cov.index] <- sum(ART.coverage.calculator(datalist = datalist.agemix, # agegroup = c(15, 50), # timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.onART) / # sum(ART.coverage.calculator(datalist = datalist.agemix, # agegroup = c(15, 50), # timepoint = ART.cov.eval.timepoints[art.cov.index])$sum.cases) # } # Vector of the fraction of people who started their treatment when their CD4count was between specified values # cd4.atARTinit.fractions.timepoints <- seq(from = intervention[[1]]$time + 5, # to = datalist.agemix$itable$population.simtime[1], # by = 5) #30, 35, 40 # cd4.any.vector <- cd4.lt100.vector <- cd4.100.200.vector <- cd4.200.350.vector <- cd4.350.500.vector <- cd4.500plus.vector <- rep(NA, length(cd4.atARTinit.fractions.timepoints)) # for (cd4.index in 1:length(cd4.lt100.vector)){ # cd4.lt100.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, # TStart < cd4.atARTinit.fractions.timepoints[cd4.index], # TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), # CD4atARTstart < 100) %>% select(ID) %>% unique() %>% nrow() # cd4.100.200.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, # TStart < cd4.atARTinit.fractions.timepoints[cd4.index], # TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), # CD4atARTstart >= 100, # CD4atARTstart < 200) %>% select(ID) %>% unique() %>% nrow() # cd4.200.350.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, # TStart < cd4.atARTinit.fractions.timepoints[cd4.index], # TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), # CD4atARTstart >= 200, # CD4atARTstart < 350) %>% select(ID) %>% unique() %>% nrow() # cd4.350.500.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, # TStart < cd4.atARTinit.fractions.timepoints[cd4.index], # TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), # CD4atARTstart >= 350, # CD4atARTstart < 500) %>% select(ID) %>% unique() %>% nrow() # cd4.500plus.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, # TStart < cd4.atARTinit.fractions.timepoints[cd4.index], # TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5), # CD4atARTstart >= 500) %>% select(ID) %>% unique() %>% nrow() # cd4.any.vector[cd4.index] <- dplyr::filter(datalist.agemix$ttable, # TStart < cd4.atARTinit.fractions.timepoints[cd4.index], # TStart >= (cd4.atARTinit.fractions.timepoints[cd4.index] - 5)) %>% select(ID) %>% unique() %>% nrow() # } # incid.final <- incidence.calculator(datalist = datalist.agemix, # agegroup = c(15, 50), # timewindow = c((datalist.agemix$itable$population.simtime[1] - 2), datalist.agemix$itable$population.simtime[1]), # only.active = "No")$incidence[3] growthrate <- pop.growth.calculator(datalist = datalist.agemix, timewindow = c(0, datalist.agemix$itable$population.simtime[1])) outputvector <- c(AAD.male, SDAD.male, slope.male, WSD.male, BSD.male, intercept.male, shape.nb.male, scale.nb.male, #meandegree.male, pp.cp.6months.male, hiv.prev.lt25.women, hiv.prev.lt25.men, hiv.prev.25.34.women, hiv.prev.25.34.men, hiv.prev.35.44.women, hiv.prev.35.44.men, exp(growthrate)) } } return(outputvector) }
MICE_ABC <- function(targets = targets.empirical, n.experiments = 16, lls = c(1.01, 0.15, -1, 2, 0.05, 0.05, 10, 10, -1, 0, -1, -1, -5, -2, -0.2), #c(2, -1, 0.1, 2, 1, 1, 0.3), #c(0.1, -3, 0.1, 0, 0, 0, 0.2), uls = c(3, 0.35, 1, 5, 1, 1, 100, 100, -0.2, 5, -0.1, -0.1, -1, -0.2, 0), #c(4, -0.2, 0.6, 8, 6, 6, 0.95), #c(5, -0.1, 1.0, 10, 10, 10, 1.0), # probs = c(3, 7), # indicate in the lls and uls vectors, which elements represent probabilities strictly between 0 and 1 model = simpact.wrapper, maxit = 10, maxwaves = 2, # and we can also try 10 to do the same as the 10 waves of Lenormand #reps = 5, alpha = 0.25, saturation.crit = 0){ ptm <- proc.time() # Start the clock range.width <- uls - lls ll.mat <- matrix(rep(lls, n.experiments), nrow = n.experiments, byrow = TRUE) range.width.mat <- matrix(rep(range.width, n.experiments), nrow = n.experiments, byrow = TRUE) sobol.seq.0.1 <- sobol(n = n.experiments, dim = length(lls), init = TRUE, scrambling = 1, seed = 1, normal = FALSE) experiments <- ll.mat + sobol.seq.0.1 * range.width.mat calibration.list <- list() # initiating the list where all the output of MiceABC will be stored wave <- 1 # initiating the loop of waves of simulations (one iteration is one wave) rel.dist.cutoff <- Inf # initially it is infinitely large, but in later iterations it shrinks saturation <- 0 sim.results.with.design.df.selected <- NULL while (wave <= maxwaves && saturation == 0){ print(c("wave", wave), quote = FALSE) # only for local testing. To be deleted after testing is completed # These large and small enough tests could be commented out because the "true" parameter value may lie outside the initial parameter range. # They may be useful to prevent infeasible MICE-derived proposals from running (e.g. positive parameters values that don't make sense) large.enough <- t(t(experiments) >= lls) small.enough <- t(t(experiments) <= uls) fine <- large.enough & small.enough fine.flag <- as.logical(rowSums(fine) == ncol(fine)) experiments <- experiments[fine.flag, ] #try(if(nrow(experiments) == 0) stop("no experiments in the range")) if (nrow(experiments) < 2){ wave <- maxwaves } else { print(c(nrow(experiments), "experiments to be run"), quote = FALSE) # only for local testing. To be deleted after testing is completed calibration.list$experiments.executed[[wave]] <- nrow(experiments) # Running the experiments # nl.path <- "/Applications/NetLogo 6.0/app/" # NLStart(nl.path, gui=FALSE, nl.obj="agemix.nl1", nl.jarname = 'netlogo-6.0.0.jar') # model.path <- "/Users/delvaw/Google Drive/IBM course/tutorials/AgeMixing/Calibration/dummy.0.1.nlogo" # NLLoadModel(model.path, nl.obj="agemix.nl1") # source(file = "/Users/delvaw/Google Drive/IBM course/tutorials/AgeMixing/Calibration/agemix.simulate.R") # sim.results.simple <- t(apply(experiments, 1, model, no.repeated.sim=reps)) # Maybe this apply() can be replaced by a faster foreach loop? # Replacing NetLogo model with parallel Simpact model # 1. Initially: n.experiments from sobol sequences. 7. Later, (1-alpha) fraction of n.experiments from MICE proposals sim.results.simple <- simpact.parallel(model = model, actual.input.matrix = experiments, seed_count = 0, n_cluster = 8) #experiment.number <- 1:nrow(experiments) sim.results.with.design.df <- as.data.frame(cbind(#experiment.number, experiments, sim.results.simple)) x.names <- paste0("x.", seq(1:ncol(experiments))) y.names <- paste0("y.", seq(1:ncol(sim.results.simple))) names(sim.results.with.design.df) <- c(x.names, y.names) # 2. All n.experiments from sobol sequences get summarised measure of distance from target. 8. Distance measures for new proposals # NEEDS TO BE AUTOMATED # for(i in 1:6) { #-- Create objects 'r.1', 'r.2', ... 'r.6' -- # nam <- paste("r", i, sep = ".") # assign(nam, 1:i) # } #browser() sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) for(i in 1:length(targets)) { nam <- paste0("y.", i, ".sq.rel.dist") val <- ((sim.results.simple[,i] - targets[i]) / targets[i])^2 assign(nam, val) sum.sq.rel.dist <- sum.sq.rel.dist + get(nam) } sim.results.with.design.df$sum.sq.rel.dist <- sum.sq.rel.dist # sim.results.with.design.df$y.1.sq.rel.dist <- ((sim.results.with.design.df$y.1 - targets[1]) / targets[1])^2 # sim.results.with.design.df$y.2.sq.rel.dist <- ((sim.results.with.design.df$y.2 - targets[2]) / targets[2])^2 # sim.results.with.design.df$y.3.sq.rel.dist <- ((sim.results.with.design.df$y.3 - targets[3]) / targets[3])^2 # sim.results.with.design.df$y.4.sq.rel.dist <- ((sim.results.with.design.df$y.4 - targets[4]) / targets[4])^2 # sim.results.with.design.df$y.5.sq.rel.dist <- ((sim.results.with.design.df$y.5 - targets[5]) / targets[5])^2 # sim.results.with.design.df$y.6.sq.rel.dist <- ((sim.results.with.design.df$y.6 - targets[6]) / targets[6])^2 # sim.results.with.design.df$y.7.sq.rel.dist <- ((sim.results.with.design.df$y.7 - targets[7]) / targets[7])^2 # sim.results.with.design.df$y.8.sq.rel.dist <- ((sim.results.with.design.df$y.8 - targets[8]) / targets[8])^2 # sim.results.with.design.df$y.9.sq.rel.dist <- ((sim.results.with.design.df$y.9 - targets[9]) / targets[9])^2 # # # sim.results.with.design.df$sum.sq.rel.dist <- sim.results.with.design.df$y.1.sq.rel.dist + sim.results.with.design.df$y.2.sq.rel.dist + sim.results.with.design.df$y.3.sq.rel.dist + sim.results.with.design.df$y.4.sq.rel.dist + sim.results.with.design.df$y.5.sq.rel.dist + sim.results.with.design.df$y.6.sq.rel.dist + sim.results.with.design.df$y.7.sq.rel.dist + sim.results.with.design.df$y.8.sq.rel.dist + sim.results.with.design.df$y.9.sq.rel.dist # 2b. Writing to list all the input and output of the executed experiments, so that we can plot it later calibration.list$sim.results.with.design[[wave]] <- sim.results.with.design.df # 10. Calculate fraction of new (1-alpha frac *n.experiments) distances that are below "old" distance threshold. NOT CURRENTLY USED BECAUSE saturation.crit = 0. below.old.treshold <- sim.results.with.design.df$sum.sq.rel.dist < rel.dist.cutoff frac.below.old.threshold <- sum(below.old.treshold %in% TRUE) / round(n.experiments * (1-alpha)) if(frac.below.old.threshold < saturation.crit) saturation <- 1 # If less than the fraction saturation.crit of the new experiments a closer fit than the previous batch of retained experiments, the loop must be terminated. # 9. Merge with previously kept alpha fraction shortest distances sim.results.with.design.df <- rbind(sim.results.with.design.df.selected, sim.results.with.design.df) # Initially sim.results.with.design.df.selected = NULL # 3. Keeping alpha fraction shortest distances dist.order <- order(sim.results.with.design.df$sum.sq.rel.dist) # Ordering the squared distances from small to big. The last observation (targets) should be ranked first last.one.selected <- round(alpha * n.experiments) # OR IT COULD ALSO BE WRITTEN AS round(alpha * length(dist.order)) BUT IF RANGE CHECKS ARE ON, THE nrow IS NOT NECESSARILY = n.experiments selected.distances <- dist.order[1:last.one.selected] sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] %>% filter(complete.cases(.)) # Here we filter out any experiments that gave NA output # 4. Set distance threshold at alpha-percentile best distance. 11. Update distance threshold rel.dist.cutoff <- max(sim.results.with.design.df.selected$sum.sq.rel.dist) # We update the rel.dist.cutoff, based on the alpha fraction best parametersets # 5. Write alpha fraction of input-output-distances and distance threshold to list. 12. Write new best alpha fraction and updated threshold to list calibration.list$inputoutput[[wave]] <- sim.results.with.design.df.selected calibration.list$rel.dist.cutoff[[wave]] <- rel.dist.cutoff # 6. Feed MICE the best alpha fraction and ask for (1-alpha) fraction proposals based on targets targets.df <- as.data.frame(matrix(targets, ncol = length(targets))) # Putting target in dataframe format names(targets.df) <- y.names df.give.to.mice <- full_join(dplyr::select(sim.results.with.design.df.selected, -contains("sq.rel.dist")), # adding target to training dataset targets.df, by = names(targets.df)) # "by" statement added to avoid printing message of the variables were used for joining ### Before actually giving it to MICE, we need to logit transform the input params that represent probabilities, so that we can treat these variables as continuous variables # "probs" shows that it is the 3rd and 7th input variable (not counting the random seed variable) # df.give.to.mice[, probs] <- car::logit(df.give.to.mice[, probs]) # Commented out because none of the input parameters are probabilities for this age-mixing calibration # INSTEAD, we are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b. # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet strict.positive.params <- c(4:8) df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params]) # Let's think a bit more carefully about which variables should be allowed as input for which input parameters. # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS. predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix. # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model. # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # And a reminder of the output vector # 1. AAD.male (4) # 2. SDAD.male (4) # 3. slope.male (0.7) # 4. WSD.male (3) # 5. BSD.male (1.5) # 6. intercept.male (-1) # 7. shape.nb.male (1.29) # 8. scale.nb.male (0.66) # THIS IS OUT 9. meandegree.male (1) # 10. pp.cp.6months.male (0.134) # 11. hiv.prev.lt25.women (0.2) # 12. hiv.prev.lt25.men (0.1) # 13. hiv.prev.25.34.women (0.42) # 14. hiv.prev.25.34.men (0.17) # 15. hiv.prev.35.44.women (0.37) # 16. hiv.prev.35.44.men (0.24) # 17. exp(growthrate)) (1.01) x.offset <- length(x.names) predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them. # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage. predictorMatrix[1, x.offset + 11:12] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD predictorMatrix[5, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[6, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate) predictorMatrix[7, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[8, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 15, 16, 17)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence, predictorMatrix[10, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence. predictorMatrix[11, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[12, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[13, x.offset + 17] <- 1 # conception.alpha_base is predicted by popgrowth predictorMatrix[14, x.offset + c(7, 8, 10, 17)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth predictorMatrix[15, x.offset + c(7, 8, 10, 17)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?) # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity. # Because there are some many interaction terms, let's first try without any #df.give.to.mice <- cbind(df.give.to.mice, data.frame(y.1.y.2 = df.give.to.mice$y.1 * df.give.to.mice$y.2)) # adding interaction term print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE) mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data m = round(n.experiments * (1-alpha)), # number of imputations predictorMatrix = predictorMatrix, method = "norm", defaultMethod = "norm", maxit = maxit, printFlag = FALSE, data.init = NULL) # experiments <- cbind(unlist(mice.test$imp$x.1), # these are the (1-alpha) fraction of n.experiments proposals # unlist(mice.test$imp$x.2)) experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names)) # Before we check the suitability of the new experimental input parameter values, we must backtransform the logits to probabilities # experiments[, probs] <- boot::inv.logit(experiments[, probs]) # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params]) } wave <- wave + 1 } calibration.list$secondspassed <- proc.time() - ptm # Stop the clock return(calibration.list) }
# A function to run Simpact in parallel simpact.parallel <- function(model = simpact.wrapper, actual.input.matrix = matrix(rep(c(1:15), 16), nrow = 16), #nb_simul = 16, seed_count = 0, n_cluster = 8){ cl <- makeCluster(getOption("cl.cores", n_cluster)) tab_simul_summarystat = NULL list_param <- list(NULL) tab_param <- NULL paramtemp <- NULL simultemp <- NULL nb_simul <- nrow(actual.input.matrix) for (i in 1:nb_simul) { l <- ncol(actual.input.matrix) param <- c((seed_count + i), actual.input.matrix[i, ]) list_param[[i]] <- param tab_param <- rbind(tab_param, param[2:(l + 1)]) paramtemp <- rbind(paramtemp, param[2:(l + 1)]) } list_simul_summarystat = parLapplyLB(cl, list_param, model) tab_simul_summarystat <- do.call(rbind, list_simul_summarystat) stopCluster(cl) return(tab_simul_summarystat) }
Let's create the "truth" with the master model.
reps <- 2 target.vector.master <- c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05) test.inputvectorplusseed <- c(1, target.vector.master) # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # simpact.wrapper(test.inputvectorplusseed) # 1. AAD.male (4) # 2. SDAD.male (4) # 3. slope.male (0.7) # 4. WSD.male (3) # 5. BSD.male (1.5) # 6. intercept.male (-1) # 7. shape.nb.male (1.29) # 8. scale.nb.male (0.66) # THIS IS OUT 9. meandegree.male (1) # 10. pp.cp.6months.male (0.134) # 11. hiv.prev.lt25.women (0.2) # 12. hiv.prev.lt25.men (0.1) # 13. hiv.prev.25.34.women (0.42) # 14. hiv.prev.25.34.men (0.17) # 15. hiv.prev.35.44.women (0.37) # 16. hiv.prev.35.44.men (0.24) # 17. exp(growthrate)) (1.01) targets.empirical <- c(4, 4, 0.7, 3, 1.5, -1, 1.29, 0.66, 0.134, 0.05, 0.03, 0.1, 0.05, 0.08, 0.09, 1.01) targets.master <- c(4.0651198, 3.5641622, 0.5463380, 1.8322277, 1.3357401, -0.6137708, 0.4686528, 2.1738464, 0.1203855, 0.3389044, 0.2751678, 0.3911846, 0.5899390, 0.3227513, 0.4395604, 1.0111390) targets.diff <- targets.empirical - targets.master # Filling default values, to facilitate interactive testing of incremental mice function RMSD.tol.max <- 1 min.givetomice <- 80 n.experiments <- 400 lls <- c(1, 0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16) uls <- c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3, -0.001) model <- simpact.wrapper maxit <- 50 maxwaves <- 8 # design.matrix.control <- matrix(rep(target.vector.master, reps), nrow = reps, byrow = TRUE) # # design.matrix.interv <- matrix(rep(c(3, -0.5, 0.5, 5, 3, 3, 0.75, 1), reps), nrow = reps, byrow = TRUE) # test.mat <- matrix(data = NA, nrow = 6, ncol = 17) # for (test.i in 1:6){ # test.mat[test.i, ] <- simpact.wrapper(c(test.i, experiments[test.i, ])) # } # # sumsmstats.control.df <- simpact.parallel(model = simpact.wrapper, # actual.input.matrix = experiments,#design.matrix.control, # seed_count = 0, # n_cluster = 8) # sumsmstats.interv.df <- simpact.parallel(model = simpact.wrapper.master, # actual.input.matrix = design.matrix.interv, # seed_count = 0, # n_cluster = 8)
root.path <- dirname(getwd()) MICE_ABC.incremental <- function(targets.empirical = targets.empirical, RMSD.tol.max = 1, min.givetomice = 128, n.experiments = 1024, lls = c(1, 0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16), uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3, -0.001), model = simpact.wrapper, maxit = 50, maxwaves = 8){ # 0. Start the clock ptm <- proc.time() calibration.list <- list() # initiating the list where all the output of MiceABC will be stored wave <- 1 # initiating the loop of waves of simulations (one iteration is one wave) rel.dist.cutoff <- Inf # initially it is infinitely large, but in later iterations it shrinks x.names <- paste0("x.", seq(1:15)) y.names <- paste0("y.", seq(1:16)) sim.results.with.design.df <- data.frame(matrix(vector(), 0, 31, dimnames = list(c(), c(x.names, y.names))), stringsAsFactors = FALSE) # Will be growing with each wave (appending) # sim.results.with.design.df.selected I think this does not need to be initiated final.intermediate.features <- rep(NA, times = length(targets.empirical)) # 1. Start loop of waves, based on comparing intermediate features with targets.empirical while (wave <= maxwaves & !identical(final.intermediate.features, targets.empirical)){ if (wave == 1){ # 2. Initial, naive results, based on Sobol sequences range.width <- uls - lls ll.mat <- matrix(rep(lls, n.experiments), nrow = n.experiments, byrow = TRUE) range.width.mat <- matrix(rep(range.width, n.experiments), nrow = n.experiments, byrow = TRUE) sobol.seq.0.1 <- sobol(n = n.experiments, dim = length(lls), init = TRUE, scrambling = 1, seed = 1, normal = FALSE) experiments <- ll.mat + sobol.seq.0.1 * range.width.mat } sim.results.simple <- simpact.parallel(model = model, actual.input.matrix = experiments, seed_count = 0, n_cluster = 8) #save(sim.results.simple, file = paste0(root.path,"/MiceABC/sim.results.simple.RData")) #load(file = "sim.results.simple.RData") new.sim.results.with.design.df <- as.data.frame(cbind(experiments, sim.results.simple)) x.names <- paste0("x.", seq(1:ncol(experiments))) y.names <- paste0("y.", seq(1:ncol(sim.results.simple))) x.offset <- length(x.names) names(new.sim.results.with.design.df) <- c(x.names, y.names) new.sim.results.with.design.df <- new.sim.results.with.design.df %>% dplyr::filter(complete.cases(.)) if (wave == 1){ # TRUE for the first wave only sim.results.with.design.df <- rbind(sim.results.with.design.df, new.sim.results.with.design.df) } else { sim.results.with.design.df <- rbind(dplyr::select(sim.results.with.design.df, -contains("RMSD")), new.sim.results.with.design.df) } # save(sim.results.with.design.df, file = "/Users/wimdelva/Documents/MiceABC/sim.results.with.design.df.RData") # load(file = "/Users/delvaw/Documents/MiceABC/sim.results.with.design.df.RData") experim.median.features <- med(dplyr::select(sim.results.with.design.df, contains("y.")), mustdith = TRUE)$median # 3. Find intermediate features and RMSD.tol for which n.close.to.targets >= min.givetomice targets.diff <- targets.empirical - experim.median.features # First we determine how far the empirical targets are away from the median features of the executed experiments candidate.RMSD.tol <- Inf # Initially, we assume that the RMSD cut-off needs to be infinitely large to have sufficient observations to give to mice. n.steps.targets <- 100 n.steps.RMSD.tol <- 100 n.close.to.targets.mat <- matrix(NA, nrow = n.steps.targets+1, ncol = n.steps.RMSD.tol+1) final.RMSD.tol <- NA shift.fraction <- NA for (steps.intermediate.targets in 0:n.steps.targets){ # We make 10% steps from empirical targets to experimental median features candidate.intermediate.features <- targets.empirical - (targets.diff * steps.intermediate.targets) / n.steps.targets for (steps.RMSD.tol in 0:n.steps.RMSD.tol){ # We make 10% steps from RMSD.tol.max to RMSD.tol = 0 RMSD.tol <- RMSD.tol.max * (n.steps.RMSD.tol - steps.RMSD.tol) / n.steps.RMSD.tol sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) for (i in 1:length(candidate.intermediate.features)) { name.dist <- paste0("y.", i, ".sq.rel.dist") value.dist <- ((sim.results.with.design.df[,i + x.offset] - candidate.intermediate.features[i]) / candidate.intermediate.features[i])^2 assign(name.dist, value.dist) sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist) } RMSD <- sqrt(sum.sq.rel.dist / length(candidate.intermediate.features)) n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE) n.close.to.targets.mat[(1+steps.intermediate.targets), (1+steps.RMSD.tol)] <- n.close.to.targets large.enough.training.df <- n.close.to.targets >= min.givetomice if (large.enough.training.df == TRUE){ if (RMSD.tol < candidate.RMSD.tol){ candidate.RMSD.tol <- RMSD.tol # Updating the candidate.RMSD.tol final.RMSD.tol <- RMSD.tol final.intermediate.features <- candidate.intermediate.features shift.fraction <- steps.intermediate.targets/n.steps.targets # 0 means targets.empirical; 1 means experim.median.features #calibration.list$intermediate.features[[wave]] <- final.intermediate.features #calibration.list$steps.intermediate.targets[[wave]] <- steps.intermediate.targets #calibration.list$RMSD.tol[[wave]] <- RMSD.tol #calibration.list$n.close.to.targets[[wave]] <- n.close.to.targets } } } } n.close.to.targets.mat final.RMSD.tol final.intermediate.features shift.fraction # 4. Calculate RMSD values for these intermediate.features and the RMSD.tol found sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) for (i in 1:length(final.intermediate.features)) { name.dist <- paste0("y.", i, ".sq.rel.dist") value.dist <- ((sim.results.with.design.df[,i + x.offset] - final.intermediate.features[i]) / final.intermediate.features[i])^2 assign(name.dist, value.dist) sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist) } RMSD <- sqrt(sum.sq.rel.dist / length(final.intermediate.features)) sim.results.with.design.df$RMSD <- RMSD n.close.to.targets <- sum(RMSD < final.RMSD.tol, na.rm = TRUE) # 5. Select n.close.to.targets shortest distances dist.order <- order(RMSD) # Ordering the squared distances from small to big. selected.distances <- dist.order[1:n.close.to.targets] sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] # 5.b. Record highest RMSD value for that the selected experiments calibration.list$max.RMSD[[wave]] <- max(sim.results.with.design.df.selected$RMSD) # 6. Record selected experiments to give to mice for this wave calibration.list$selected.experiments[[wave]] <- sim.results.with.design.df.selected # 7. Put intermediate features in dataframe format final.intermediate.features.df <- as.data.frame(matrix(final.intermediate.features, ncol = length(final.intermediate.features))) names(final.intermediate.features.df) <- y.names # 8. Prepare dataframe to give to mice: selected experiments plus intermediate features df.give.to.mice <- dplyr::full_join(dplyr::select(sim.results.with.design.df.selected, -contains("RMSD")), # adding target to training dataset final.intermediate.features.df, by = names(final.intermediate.features.df)) # "by" statement added to avoid printing message of the variables were used for joining # We are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b. # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet strict.positive.params <- c(4:8) df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params]) # 9. Override default predictorMatrix with a sparser matrix # Let's think a bit more carefully about which variables should be allowed as input for which input parameters. # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS. predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix. # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model. # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # And a reminder of the output vector # 1. AAD.male (4) # 2. SDAD.male (4) # 3. slope.male (0.7) # 4. WSD.male (3) # 5. BSD.male (1.5) # 6. intercept.male (-1) # 7. shape.nb.male (1.29) # 8. scale.nb.male (0.66) # 9. pp.cp.6months.male (0.134) # 10. hiv.prev.lt25.women (0.2) # 11. hiv.prev.lt25.men (0.1) # 12. hiv.prev.25.34.women (0.42) # 13. hiv.prev.25.34.men (0.17) # 14. hiv.prev.35.44.women (0.37) # 15. hiv.prev.35.44.men (0.24) # 16. exp(growthrate)) (1.01) predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them. # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage. predictorMatrix[1, x.offset + 10:11] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD predictorMatrix[5, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[6, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate) predictorMatrix[7, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[8, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 14, 15, 16)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence, predictorMatrix[10, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence. predictorMatrix[11, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[12, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[13, x.offset + 16] <- 1 # conception.alpha_base is predicted by popgrowth predictorMatrix[14, x.offset + c(7, 8, 9, 16)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth predictorMatrix[15, x.offset + c(7, 8, 9, 16)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?) # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity. print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE) # 10. Let mice propose parameter values that are predicted by the intermediate features mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data m = n.experiments, # number of imputations predictorMatrix = predictorMatrix, method = "norm", defaultMethod = "norm", maxit = maxit, printFlag = FALSE) # 11. Turn mice proposals into a new matrix of experiments experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names)) # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params]) # 12. Update wave count wave <- wave + 1 } # 13. Check if intermediate features have converged to empirical targests (if not, then maxwaves is reached) if (final.intermediate.features == targets.empirical){ # 14. Do more "traditional mice optimisation" for the remaining waves while (wave <= maxwaves){ sim.results.simple <- simpact.parallel(model = model, actual.input.matrix = experiments, seed_count = 0, n_cluster = 8) # save(sim.results.simple, file = "/Users/delvaw/Documents/MiceABC/sim.results.simple.RData") # load(file = "sim.results.simple.RData") new.sim.results.with.design.df <- as.data.frame(cbind(experiments, sim.results.simple)) x.names <- paste0("x.", seq(1:ncol(experiments))) y.names <- paste0("y.", seq(1:ncol(sim.results.simple))) x.offset <- length(x.names) names(new.sim.results.with.design.df) <- c(x.names, y.names) new.sim.results.with.design.df <- new.sim.results.with.design.df %>% dplyr::filter(complete.cases(.)) if (nrow(sim.results.with.design.df)==0){ # TRUE for the first wave only sim.results.with.design.df <- rbind(sim.results.with.design.df, new.sim.results.with.design.df) } else { sim.results.with.design.df <- rbind(dplyr::select(sim.results.with.design.df, -contains("RMSD")), new.sim.results.with.design.df) } # save(sim.results.with.design.df, file = "/Users/delvaw/Documents/MiceABC/sim.results.with.design.df.RData") # load(file = "/Users/delvaw/Documents/MiceABC/sim.results.with.design.df.RData") #experim.median.features <- med(dplyr::select(sim.results.with.design.df, contains("y.")))$median # 3. Find intermediate features and RMSD.tol for which n.close.to.targets >= min.givetomice # targets.diff <- targets.empirical - experim.median.features # First we determine how far the empirical targets are away from the median features of the executed experiments # candidate.RMSD.tol <- Inf # Initially, we assume that the RMSD cut-off needs to be infinitely large to have sufficient observations to give to mice. # n.steps.targets <- 100 # n.steps.RMSD.tol <- 100 # n.close.to.targets.mat <- matrix(NA, nrow = n.steps.targets+1, ncol = n.steps.RMSD.tol+1) # final.RMSD.tol <- NA # shift.fraction <- NA # for (steps.intermediate.targets in 0:n.steps.targets){ # We make 10% steps from empirical targets to experimental median features # candidate.intermediate.features <- targets.empirical - (targets.diff * steps.intermediate.targets) / n.steps.targets # for (steps.RMSD.tol in 0:n.steps.RMSD.tol){ # We make 10% steps from RMSD.tol.max to RMSD.tol = 0 # RMSD.tol <- RMSD.tol.max * (n.steps.RMSD.tol - steps.RMSD.tol) / n.steps.RMSD.tol # # sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) # for (i in 1:length(candidate.intermediate.features)) { # name.dist <- paste0("y.", i, ".sq.rel.dist") # value.dist <- ((sim.results.with.design.df[,i + x.offset] - candidate.intermediate.features[i]) / candidate.intermediate.features[i])^2 # assign(name.dist, value.dist) # sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist) # } # RMSD <- sqrt(sum.sq.rel.dist / length(candidate.intermediate.features)) # n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE) # n.close.to.targets.mat[(1+steps.intermediate.targets), (1+steps.RMSD.tol)] <- n.close.to.targets # large.enough.training.df <- n.close.to.targets >= min.givetomice # if (large.enough.training.df == TRUE){ # if (RMSD.tol < candidate.RMSD.tol){ # candidate.RMSD.tol <- RMSD.tol # Updating the candidate.RMSD.tol # final.RMSD.tol <- RMSD.tol # final.intermediate.features <- candidate.intermediate.features # shift.fraction <- steps.intermediate.targets/n.steps.targets # 0 means targets.empirical; 1 means experim.median.features # #calibration.list$intermediate.features[[wave]] <- final.intermediate.features # #calibration.list$steps.intermediate.targets[[wave]] <- steps.intermediate.targets # #calibration.list$RMSD.tol[[wave]] <- RMSD.tol # #calibration.list$n.close.to.targets[[wave]] <- n.close.to.targets # } # } # } # } # n.close.to.targets.mat # final.RMSD.tol # final.intermediate.features # shift.fraction # We can calculate the alpha fraction of close-enough data, based on the final.RMSD.tol from the previous wave, and use that as a starting point? # 4. Calculate RMSD values for these intermediate.features and the RMSD.tol found sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) for (i in 1:length(final.intermediate.features)) { # These are now equal to targets.empirical name.dist <- paste0("y.", i, ".sq.rel.dist") value.dist <- ((sim.results.with.design.df[,i + x.offset] - final.intermediate.features[i]) / final.intermediate.features[i])^2 assign(name.dist, value.dist) sum.sq.rel.dist <- sum.sq.rel.dist + get(name.dist) } RMSD <- sqrt(sum.sq.rel.dist / length(final.intermediate.features)) sim.results.with.design.df$RMSD <- RMSD n.close.to.targets <- sum(RMSD < final.RMSD.tol, na.rm = TRUE) # From hereon, we will keep using this same n.close.to.targets # alpha.fraction <- n.close.to.targets / nrow(sim.results.with.design.df) # Here we automatically set the alpha level. # 2b. Writing to list all the input and output of the executed experiments, so that we can plot it later # calibration.list$sim.results.with.design[[wave]] <- sim.results.with.design.df # 10. Calculate fraction of new (1-alpha frac *n.experiments) distances that are below "old" distance threshold. NOT CURRENTLY USED BECAUSE saturation.crit = 0. # below.old.treshold <- sim.results.with.design.df$sum.sq.rel.dist < rel.dist.cutoff # frac.below.old.threshold <- sum(below.old.treshold %in% TRUE) / round(n.experiments * (1-alpha)) # if(frac.below.old.threshold < saturation.crit) saturation <- 1 # If less than the fraction saturation.crit of the new experiments a closer fit than the previous batch of retained experiments, the loop must be terminated. # 9. Merge with previously kept alpha fraction shortest distances # sim.results.with.design.df <- rbind(sim.results.with.design.df.selected, sim.results.with.design.df) # Initially sim.results.with.design.df.selected = NULL # 3. Keeping alpha fraction shortest distances # 5. Select n.close.to.targets shortest distances dist.order <- order(RMSD) # Ordering the squared distances from small to big. selected.distances <- dist.order[1:n.close.to.targets] sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] # 5.b. Record highest RMSD value for that the selected experiments calibration.list$max.RMSD[[wave]] <- max(sim.results.with.design.df.selected$RMSD) # 6. Record selected experiments to give to mice for this wave calibration.list$selected.experiments[[wave]] <- sim.results.with.design.df.selected # 7. Put intermediate features in dataframe format final.intermediate.features.df <- as.data.frame(matrix(final.intermediate.features, ncol = length(final.intermediate.features))) names(final.intermediate.features.df) <- y.names # 8. Prepare dataframe to give to mice: selected experiments plus intermediate features df.give.to.mice <- dplyr::full_join(dplyr::select(sim.results.with.design.df.selected, -contains("RMSD")), # adding target to training dataset final.intermediate.features.df, by = names(final.intermediate.features.df)) # "by" statement added to avoid printing message of the variables were used for joining # We are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b. # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet strict.positive.params <- c(4:8) df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params]) # 9. Override default predictorMatrix with a sparser matrix # Let's think a bit more carefully about which variables should be allowed as input for which input parameters. # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS. predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix. # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model. # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # And a reminder of the output vector # 1. AAD.male (4) # 2. SDAD.male (4) # 3. slope.male (0.7) # 4. WSD.male (3) # 5. BSD.male (1.5) # 6. intercept.male (-1) # 7. shape.nb.male (1.29) # 8. scale.nb.male (0.66) # 9. pp.cp.6months.male (0.134) # 10. hiv.prev.lt25.women (0.2) # 11. hiv.prev.lt25.men (0.1) # 12. hiv.prev.25.34.women (0.42) # 13. hiv.prev.25.34.men (0.17) # 14. hiv.prev.35.44.women (0.37) # 15. hiv.prev.35.44.men (0.24) # 16. exp(growthrate)) (1.01) predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them. # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage. predictorMatrix[1, x.offset + 10:11] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD predictorMatrix[5, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[6, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate) predictorMatrix[7, x.offset + c(7, 8, 9, 13, 16)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[8, x.offset + c(7, 8, 9, 12, 16)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 14, 15, 16)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence, predictorMatrix[10, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence. predictorMatrix[11, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[12, x.offset + c(7, 8, 9, 12, 13, 16)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[13, x.offset + 16] <- 1 # conception.alpha_base is predicted by popgrowth predictorMatrix[14, x.offset + c(7, 8, 9, 16)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth predictorMatrix[15, x.offset + c(7, 8, 9, 16)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?) # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity. print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE) # 10. Let mice propose parameter values that are predicted by the intermediate features mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data m = n.experiments, # number of imputations predictorMatrix = predictorMatrix, method = "norm", defaultMethod = "norm", maxit = maxit, printFlag = FALSE) # 11. Turn mice proposals into a new matrix of experiments experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names)) # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params]) wave <- wave + 1 } } # 15. Stop clock and return calibration list calibration.list$secondspassed <- proc.time() - ptm # Stop the clock return(calibration.list) } while (wave <= maxwaves){ #print(c("wave", wave), quote = FALSE) # only for local testing. To be deleted after testing is completed #print(c(nrow(experiments), "experiments to be run"), quote = FALSE) # only for local testing. To be deleted after testing is completed # 1. Initially: n.experiments from sobol sequences. 7. Later, (1-alpha) fraction of n.experiments from MICE proposals # We should consider calculating an empirical median of the multivariate distribution of simulation outputs, and use this # instead of what we now refer to as the targets.master. sim.results.simple.complete <- as.data.frame(sim.results.simple) %>% dplyr::filter(complete.cases(.)) experim.median.features <- med(sim.results.simple.complete)$median targets.diff <- targets.empirical - experim.median.features #### ## hack to avoid time-consuming initial naive simulations ## previous.data.lists = c(MICE_ABC_testoutput, MICE_ABC_testoutput.2) data.elements.location <- which(names(previous.data.lists) %in% "sim.results.with.design") previous.data <- do.call(rbind, unlist(previous.data.lists[data.elements.location], recursive = FALSE)) # The input and output of ALL the simulations run until now previous.data["y.9"] <- NULL # Taking out the mean degree feature previous.data["sum.sq.rel.dist"] <- NULL # Taking out the mean degree feature ## ## #### x.names <- paste0("x.", seq(1:ncol(experiments))) y.names <- paste0("y.", seq(1:ncol(sim.results.simple))) # y.names <- paste0("y.", seq(1:16)) x.offset <- length(x.names) new.sim.results.with.design.df <- previous.data #################################### Only while developing, using this hack names(new.sim.results.with.design.df) <- c(x.names, y.names) new.sim.results.with.design.df$RMSD <- NA sim.results.with.design.df <- new.sim.results.with.design.df # rbind(sim.results.with.design.df, new.sim.results.with.design.df) #################################### Only while developing, using this hack # 2. Set intermediate targets as close to the empirical targets, but still with enough simulation data close to these intermediate targets. # The best candidate.targets would be the empirical targets, but if they don't have enough support, we gradually lower the bar candidate.targets <- targets.empirical sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) for(i in 1:length(candidate.targets)) { nam <- paste0("y.", i, ".sq.rel.dist") val <- ((sim.results.with.design.df[,i + x.offset] - candidate.targets[i]) / candidate.targets[i])^2 assign(nam, val) sum.sq.rel.dist <- sum.sq.rel.dist + get(nam) } RMSD <- sqrt(sum.sq.rel.dist / length(candidate.targets)) sim.results.with.design.df$RMSD <- RMSD n.close.to.targets <- 0 RMSD.tol <- 0 while (n.close.to.targets < min.givetomice){ RMSD.tol <- RMSD.tol + 0.01 n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE) } if (n.close.to.targets < min.givetomice){ n.close.to.targets <- 0 lower.step <- 0.01 iter <- 0 frac.lower.bar <- iter * lower.step while (n.close.to.targets < min.givetomice & frac.lower.bar <= 1){ candidate.targets <- candidate.targets - lower.step * targets.diff sum.sq.rel.dist <- rep(0, nrow(sim.results.with.design.df)) for(i in 1:length(candidate.targets)) { nam <- paste0("y.", i, ".sq.rel.dist") val <- ((sim.results.with.design.df[,i + x.offset] - candidate.targets[i]) / candidate.targets[i])^2 assign(nam, val) sum.sq.rel.dist <- sum.sq.rel.dist + get(nam) } RMSD <- sqrt(sum.sq.rel.dist / length(candidate.targets)) n.close.to.targets <- sum(RMSD < RMSD.tol, na.rm = TRUE) iter <- iter + 1 frac.lower.bar <- iter * lower.step } sim.results.with.design.df$RMSD <- RMSD } print(frac.lower.bar) print(n.close.to.targets) # ?9. Merge with previously kept alpha fraction shortest distances # ?sim.results.with.design.df <- rbind(sim.results.with.design.df.selected, sim.results.with.design.df) # Initially sim.results.with.design.df.selected = NULL # ?Not sure if/how we are going to use this in the new algorithm # 3. Keeping n.close.to.targets shortest distances dist.order <- order(RMSD) # Ordering the squared distances from small to big. selected.distances <- dist.order[1:n.close.to.targets] sim.results.with.design.df.selected <- sim.results.with.design.df[selected.distances, ] %>% dplyr::filter(complete.cases(.)) # Here we filter out any experiments that gave NA output. Probably not necessary because the RMSD would have been NA # 4. Set distance threshold at alpha-percentile best distance. 11. Update distance threshold rel.dist.cutoff <- max(sim.results.with.design.df.selected$RMSD) # We update the rel.dist.cutoff, based on the alpha fraction best parametersets # 5. Write alpha fraction of input-output-distances and distance threshold to list. # Write the # Write new best alpha fraction and updated threshold to list calibration.list$inputoutput[[wave]] <- sim.results.with.design.df.selected calibration.list$candidate.targets[[wave]] <- candidate.targets calibration.list$rel.dist.cutoff[[wave]] <- rel.dist.cutoff # 6. Feed MICE the best alpha fraction and ask for (1-alpha) fraction proposals based on targets candidate.targets.df <- as.data.frame(matrix(candidate.targets, ncol = length(candidate.targets))) # Putting target in dataframe format names(candidate.targets.df) <- y.names df.give.to.mice <- dplyr::full_join(dplyr::select(sim.results.with.design.df.selected, -contains("RMSD")), # adding target to training dataset candidate.targets.df, by = names(candidate.targets.df)) # "by" statement added to avoid printing message of the variables were used for joining # We are transforming parameters that are necessarily strictly positive: sigma, gamma.a, gamma.b. # We could also consider a similar transformation for input parameters that we think should be negative (e.g. formation.hazard.agegapry.gap_factor_man_exp) but for now not yet strict.positive.params <- c(4:8) df.give.to.mice[, strict.positive.params] <- log(df.give.to.mice[, strict.positive.params]) # Let's think a bit more carefully about which variables should be allowed as input for which input parameters. # IN THE FUTURE THIS COULD BE AUTOMATED WITH VARIABLE SELECTION ALGORITHMS. predictorMatrix <- (1 - diag(1, ncol(df.give.to.mice))) # This is the default matrix. # Let's now modify the first 15 rows of this matrix, corresponding to the indicators of predictor variables for the input variables. In brackets the values for the master model. # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # And a reminder of the output vector # 1. AAD.male (4) # 2. SDAD.male (4) # 3. slope.male (0.7) # 4. WSD.male (3) # 5. BSD.male (1.5) # 6. intercept.male (-1) # 7. shape.nb.male (1.29) # 8. scale.nb.male (0.66) # 9. meandegree.male (1) # 10. pp.cp.6months.male (0.134) # 11. hiv.prev.lt25.women (0.2) # 12. hiv.prev.lt25.men (0.1) # 13. hiv.prev.25.34.women (0.42) # 14. hiv.prev.25.34.men (0.17) # 15. hiv.prev.35.44.women (0.37) # 16. hiv.prev.35.44.men (0.24) # 17. exp(growthrate)) (1.01) predictorMatrix[1:length(x.names), ] <- 0 # First we "empty" the relevant rows, then we refill them. # We are currently not allowing input variables to be predicted by other predictor variables. Only via output variables. We could change this at a later stage. predictorMatrix[1, x.offset + 11:12] <- 1 # relative susceptibility in young women is predicted by HIV prevalence in young men and women predictorMatrix[2, x.offset + 3] <- 1 # agescale predicted by slope predictorMatrix[3, x.offset + c(1, 3, 6)] <- 1 # mean of the person-specific age gap preferences is predicted by slope, intercept and AAD predictorMatrix[4, x.offset + c(2, 4, 5)] <- 1 # sd of the person-specific age gap preferences is predicted by SD, WSD, BSD predictorMatrix[5, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[6, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma a predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.women, exp(growthrate) predictorMatrix[7, x.offset + c(7, 8, 10, 14, 17)] <- 1 # man gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[8, x.offset + c(7, 8, 10, 13, 17)] <- 1 # woman gamma b predicted by gamma shape.male, scale.male, pp.cp, hiv.prev.25.34.men, exp(growthrate) predictorMatrix[9, x.offset + c(2, 4, 5, 7, 8, 15, 16, 17)] <- 1 # formation.hazard.agegapry.gap_factor_x_exp is predicted by population growth, age gap variance, hiv prevalence, predictorMatrix[10, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # baseline formation hazard predicted by HIV prevalence, cp, degree distrib. HIV prevalence. predictorMatrix[11, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # numrel man penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[12, x.offset + c(7, 8, 10, 13, 14, 17)] <- 1 # # numrel woman penalty is predicted by degree distrib, cp, prev, popgrowth predictorMatrix[13, x.offset + 17] <- 1 # conception.alpha_base is predicted by popgrowth predictorMatrix[14, x.offset + c(7, 8, 10, 17)] <- 1 # baseline dissolution hazard predicted by degree distrib, cp, popgrowth predictorMatrix[15, x.offset + c(7, 8, 10, 17)] <- 1 # age effect on dissolution hazard predicted by degree distrib, cp, popgrowth, HIV prev in older people (maybe?) # NOTE: As it stands, each output statistic is predicted by ALL input and ALL other output statistics. That may not be a great idea, or even possible, if there is collinearity. print(c(nrow(df.give.to.mice), "nrows to give to mice"), quote = FALSE) mice.test <- mice(data = df.give.to.mice, # the dataframe with missing data m = n.experiments, #round(n.experiments * (1-alpha)), # number of imputations # Let's ask for 128 imputations predictorMatrix = predictorMatrix, method = "norm", defaultMethod = "norm", maxit = maxit, printFlag = FALSE, data.init = NULL) # experiments <- cbind(unlist(mice.test$imp$x.1), # these are the (1-alpha) fraction of n.experiments proposals # unlist(mice.test$imp$x.2)) experiments <- unlist(mice.test$imp) %>% matrix(., byrow = FALSE, ncol = length(x.names)) # Before we check the suitability of the new experimental input parameter values, we must backtransform the logits to probabilities # experiments[, probs] <- boot::inv.logit(experiments[, probs]) # Before we check the suitability of the new experimental input parameter values, we must backtransform the log values to natural values experiments[, strict.positive.params] <- exp(experiments[, strict.positive.params]) wave <- wave + 1 } calibration.list$secondspassed <- proc.time() - ptm # Stop the clock return(calibration.list) } #data.elements.location <- which(names(previous.data.lists) %in% "sim.results.with.design") #previous.data <- do.call(rbind, unlist(previous.data.lists[data.elements.location], recursive = FALSE)) # The input and output of ALL the simulations run until now. # previous.calib.lls <- as.numeric(apply(experiments, 2, min)) # previous.calib.uls <- as.numeric(apply(experiments, 2, max)) # # To see how the previous calibration shifted the input ranges, we should also recall the lls and uls of the calibration before the previous (which may be the initial naive range boundaries) # pre.experiments <- MICE_ABC_testoutput$inputoutput[[length(MICE_ABC_testoutput$inputoutput)]][, 1:length(x.names)] # pre.previous.calib.lls <- as.numeric(apply(pre.experiments, 2, min)) # pre.previous.calib.uls <- as.numeric(apply(pre.experiments, 2, max))
test.MICE_ABC.incremental.results <- MICE_ABC.incremental(targets.empirical = targets.empirical, targets.master = targets.master, RMSD.tol = 0.2, min.givetomice = 128, n.experiments = 512, lls = c(1, 0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16), uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3, -0.001), model = simpact.wrapper, maxit = 50, maxwaves = 10)
MICE_ABC.incremental <- function(targets.empirical = targets.empirical, targets.master = targets.master, # 0.3 * targets.diff, #previous.data.lists = c(MICE_ABC_testoutput, MICE_ABC_testoutput.2, MICE_ABC_testoutput.3), RMSD.tol = 0.2, min.givetomice = 128, n.experiments = 512, lls = c(1, 0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16), uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3, -0.001), model = simpact.wrapper, maxit = 50, maxwaves = 2)
ptm <- proc.time() # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # inputs: c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05) MICE_ABC_testoutput <- MICE_ABC(targets = targets.master + 0.1 * targets.diff, #targets.empirical, # 17 output statistics c(4, 4, 0.7, 3, 1.5, -1, 1.29, 0.66, 1, 0.134, 0.2, 0.1, 0.42, 0.17, 0.37, 0.24, 1.01) n.experiments = 128, lls = c(1, 0.12, -0.3, 2.5, 0.1, 0.1, 20, 20, -0.8, 2, -0.35, -0.35, -3.6, -0.8, -0.16), #c(2, -1, 0.1, 2, 1, 1, 0.3), #c(0.1, -3, 0.1, 0, 0, 0, 0.2), uls = c(1.2, 0.37, 0.3, 3.5, 0.4, 0.4, 66, 66, -0.25, 3.9, -0.1, -0.1, -1.4, -0.3, -0.001), #c(4, -0.2, 0.6, 8, 6, 6, 0.95), #c(5, -0.1, 1.0, 10, 10, 10, 1.0), model = simpact.wrapper, maxit = 20, maxwaves = 5, # and we can also try 10 to do the same as the 10 waves of Lenormand #reps = 5, alpha = 0.25, saturation.crit = 0) mice.timeused <- proc.time() - ptm mice.timeused # xxxx seconds
root.path <- dirname(getwd()) save(MICE_ABC_testoutput, file = paste0(root.path, "/MiceABC/mice.age-mixing.RData"))
Now we can use the calibrated models from this first step as starting point for the second step (targets.master + 0.2 * targets.diff). The shift in parameter values from the base model to this new starting point can give us a hint about how to choose the new range in parameter space to explore. In the exercise below we start again with LHS sampling, but we should modify the miceabc code so that the input from the inputoutput[[lastwave]] gets use as initial experiments matrix in wave 1.
ptm <- proc.time() # 1. hivtransmission.param.f1 (2) # 2. formation.hazard.agegapry.gap_agescale_man = formation.hazard.agegapry.gap_agescale_woman (0.25) # 3. person.agegap.man.dist.normal.mu = person.agegap.woman.dist.normal.mu (0) # 4. person.agegap.man.dist.normal.sigma = person.agegap.woman.dist.normal.sigma (3) # 5. person.person.eagerness.man.dist.gamma.a (0.23) # 6. person.person.eagerness.woman.dist.gamma.a (0.23) # 7. person.person.eagerness.man.dist.gamma.b (45) # 8. person.person.eagerness.woman.dist.gamma.b (45) # 9. formation.hazard.agegapry.gap_factor_man_exp = formation.hazard.agegapry.gap_factor_woman_exp (-0.5) # 10. formation.hazard.agegapry.baseline (0) # 11. formation.hazard.agegapry.numrel_man (-0.5) # 12. formation.hazard.agegapry.numrel_woman (-0.5) # 13. conception.alpha_base (-2.5) # 14. dissolution.alpha_0 (-0.52) # 15. dissolution.alpha_4 (-0.05)) # inputs: c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05) MICE_ABC_testoutput.2 <- MICE_ABC(targets = targets.master + 0.2 * targets.diff, #targets.empirical, # 17 output statistics c(4, 4, 0.7, 3, 1.5, -1, 1.29, 0.66, 1, 0.134, 0.2, 0.1, 0.42, 0.17, 0.37, 0.24, 1.01) n.experiments = 256, lls = c(1, 0.12, -0.5, 2.8, 0.2, 0.1, 30, 30, -0.7, 2, -0.35, -0.35, -3.8, -0.6, -0.08), #c(2, -1, 0.1, 2, 1, 1, 0.3), #c(0.1, -3, 0.1, 0, 0, 0, 0.2), uls = c(1.2, 0.3, 0.4, 3.5, 0.4, 0.4, 66, 66, -0.35, 3.9, -0.1, -0.1, -2, -0.25, -0.004), #c(4, -0.2, 0.6, 8, 6, 6, 0.95), #c(5, -0.1, 1.0, 10, 10, 10, 1.0), model = simpact.wrapper, maxit = 20, maxwaves = 8, # and we can also try 10 to do the same as the 10 waves of Lenormand #reps = 5, alpha = 0.25, saturation.crit = 0) mice.timeused <- proc.time() - ptm mice.timeused # xxxx seconds
MICE_ABC_testoutput.2$rel.dist.cutoff str(MICE_ABC_testoutput.2$sim.results.with.design) str(MICE_ABC_testoutput.2$inputoutput) # 1. AAD.male (4) # 2. SDAD.male (4) # 3. slope.male (0.7) # 4. WSD.male (3) # 5. BSD.male (1.5) # 6. intercept.male (-1) # 7. shape.nb.male (1.29) # 8. scale.nb.male (0.66) # 9. meandegree.male (1) # 10. pp.cp.6months.male (0.134) # 11. hiv.prev.lt25.women (0.2) # 12. hiv.prev.lt25.men (0.1) # 13. hiv.prev.25.34.women (0.42) # 14. hiv.prev.25.34.men (0.17) # 15. hiv.prev.35.44.women (0.37) # 16. hiv.prev.35.44.men (0.24) # 17. exp(growthrate)) (1.01) targets.master + 0.2 * targets.diff MICE_ABC_testoutput.2$inputoutput[[8]][1:5, 16:32] # initial naive input: c(1.1, 0.25, 0, 3, 0.23, 0.23, 45, 45, -0.5, 2.8, -0.2, -0.2, -2.5, -0.52, -0.05) MICE_ABC_testoutput.2$inputoutput[[8]][1:5, 1:15]
root.path <- dirname(getwd()) save(MICE_ABC_testoutput.2, file = paste0(root.path, "/MiceABC/mice.age-mixing.0.2.RData"))
root.path <- dirname(getwd()) load(file = paste0(root.path, "/MiceABC/incidence.rel.reduc.RData"))
Now let's subtract the incidence estimates from one another.
summary.interv <- summary(sumsmstats.interv.df) #colMeans(sumsmstats.interv.df, na.rm = TRUE) #lastcolumn <- ncol(sumsmstats.control.df) #incidence.rel.reduc <- (sumsmstats.control.df[,lastcolumn] - sumsmstats.interv.df[,lastcolumn]) / sumsmstats.control.df[,lastcolumn] #hist(incidence.rel.reduc, 20) #summary(incidence.rel.reduc) #predict.95.percent.interval.incidence.rel.reduc <- quantile(incidence.rel.reduc, probs = c(0.025, 0.975), na.rm = TRUE) summary.interv.master <- summary.interv # summary(sumsmstats.interv.df) target.vector.master <- colMeans(sumsmstats.interv.df, na.rm =TRUE) target.vector.trainer <- head(target.vector.master, -1) target.vector.trainer[9:length(target.vector.trainer)] <- exp(target.vector.trainer[9:length(target.vector.trainer)]) # We're turning target zeros into target ones
Now let's explore and visualise the results
root.path <- dirname(getwd()) load(file = paste0(root.path, "/MiceABC/reject.tasp.RData")) load(file = paste0(root.path, "/MiceABC/mice.tasp.RData")) # 1. For rejectabc # Calculating squared relative distances and sum thereof for the ABC rejection method rej.sum.sq.rel.dist <- rep(0, nrow(rejectabc.output$stats)) for(i in 1:length(target.vector.trainer)) { nam <- paste0("y.", i, ".sq.rel.dist") val <- ((rejectabc.output$stats[,i] - targets[i]) / targets[i])^2 assign(nam, val) rej.sum.sq.rel.dist <- rej.sum.sq.rel.dist + get(nam) } #rejectabc.output$y.1.sq.rel.dist <- ((rejectabc.output$stats[,1] - targets[1]) / targets[1])^2 #rejectabc.output$y.2.sq.rel.dist <- ((rejectabc.output$stats[,2] - targets[2]) / targets[2])^2 #rejectabc.output$sum.sq.rel.dist <- rejectabc.output$y.1.sq.rel.dist + rejectabc.output$y.2.sq.rel.dist dist.order <- order(rej.sum.sq.rel.dist) # Ordering the squared distances from small to big. last.one.selected <- 5 selected.distances <- dist.order[1:last.one.selected] best5.rejection <- rep(FALSE, length(dist.order)) best5.rejection[selected.distances] <- TRUE abcreject.df <- as.data.frame(cbind(rejectabc.output$param, rejectabc.output$stats)) abcreject.df$best5 <- best5.rejection abcreject.df$method <- "rejection" xvars.names <- paste0("x.", 1:7) # This will also be used by the other methods yvars.names <- paste0("y.", 1:39) # This will also be used by the other methods names(abcreject.df) <- c(xvars.names, yvars.names, "best5", "method") # 2. For Lenormandabc # 3. For miceabc # First for the 5 waves xplusyvars.length <- ncol(MICE_ABC_testoutput$sim.results.with.design[[1]]) - 1 # The last variable is the sum of squared errors miceabc_inout_all <- do.call(rbind, MICE_ABC_testoutput$sim.results.with.design)[,1:xplusyvars.length] best5.mice <- miceabc_inout_all$y.1 %in% MICE_ABC_testoutput$inputoutput[[5]]$y.1[1:5] abcmice.df <- as.data.frame(cbind(miceabc_inout_all, best5.mice)) abcmice.df$method <- "mice" names(abcmice.df) <- c(names(miceabc_inout_all), "best5", "method") ### The dataset abcmice.df will be merged with other datasets generated by the comparison methods
Now that we have calibrated the model with Rejection ABC, let's run the 5 best models many times under the intervention and control scenario, to estimate the impact of TasP.
reps <- 6 # Change to 200 reps eventually calib.rej.df <- abcreject.df %>% dplyr::filter(., best5 == TRUE) %>% dplyr::select(., x.1:x.7) design.matrix.calib.rej <- matrix(rep(as.numeric(unlist(calib.rej.df)), each = reps), nrow = reps * nrow(calib.rej.df), byrow = FALSE) design.matrix.calib.rej.control <- cbind(design.matrix.calib.rej, 0) design.matrix.calib.rej.interv <- cbind(design.matrix.calib.rej, 1) summstats.calib.rej.control.df <- simpact.parallel(model = simpact.wrapper.master, actual.input.matrix = design.matrix.calib.rej.control, seed_count = 0, n_cluster = 8) summstats.calib.rej.interv.df <- simpact.parallel(model = simpact.wrapper.master, actual.input.matrix = design.matrix.calib.rej.interv, seed_count = 0, n_cluster = 8) lastcolumn.calib.rej <- ncol(summstats.calib.rej.control.df) incidence.rel.reduc.calib.rej <- (summstats.calib.rej.control.df[,lastcolumn.calib.rej] - summstats.calib.rej.interv.df[,lastcolumn.calib.rej]) / summstats.calib.rej.control.df[,lastcolumn.calib.rej] hist.data.incidence.rel.reduc.calib.rej <- hist(incidence.rel.reduc.calib.rej, 20) summary(incidence.rel.reduc.calib.rej) predict.95.percent.interval.incidence.rel.reduc.calib.rej <- quantile(incidence.rel.reduc.calib.rej, probs = c(0.025, 0.975), na.rm = TRUE)
Now that we have calibrated the model with MICE-Cali, let's run the 5 best models many times under the intervention and control scenario, to estimate the impact of TasP.
reps <- 6 # Change to 200 reps eventually calib.mice.df <- abcmice.df %>% dplyr::filter(., best5 == TRUE) %>% dplyr::select(., x.1:x.7) design.matrix.calib.mice <- matrix(rep(as.numeric(unlist(calib.mice.df)), each = reps), nrow = reps * nrow(calib.mice.df), byrow = FALSE) design.matrix.calib.mice.control <- cbind(design.matrix.calib.mice, 0) design.matrix.calib.mice.interv <- cbind(design.matrix.calib.mice, 1) summstats.calib.mice.control.df <- simpact.parallel(model = simpact.wrapper.master, actual.input.matrix = design.matrix.calib.mice.control, seed_count = 0, n_cluster = 8) summstats.calib.mice.interv.df <- simpact.parallel(model = simpact.wrapper.master, actual.input.matrix = design.matrix.calib.mice.interv, seed_count = 0, n_cluster = 8) lastcolumn.calib.mice <- ncol(summstats.calib.mice.control.df) incidence.rel.reduc.calib.mice <- (summstats.calib.mice.control.df[,lastcolumn.calib.mice] - summstats.calib.mice.interv.df[,lastcolumn.calib.mice]) / summstats.calib.mice.control.df[,lastcolumn.calib.mice] hist.data.incidence.rel.reduc.calib.mice <- hist(incidence.rel.reduc.calib.mice, 20) summary(incidence.rel.reduc.calib.mice) predict.95.percent.interval.incidence.rel.reduc.calib.mice <- quantile(incidence.rel.reduc.calib.mice, probs = c(0.025, 0.975), na.rm = TRUE)
# Adding the target statistics and their inputs target.df <- data.frame(matrix(c(3, -0.5, 0.5, 5, 3, 3, 0.75, target.vector.trainer), nrow = 1), # The inputs and targets best5 = TRUE, method = "target") names(target.df) <- c(names(miceabc_inout_all), "best5", "method") # Now we can rbind these 4 datasets into one dataset that we can plot with ggplot2 abc.comparison.tasp.df <- rbind(abcreject.df, #abcLenorm.df, abcmice.df, target.df) abc.comparison.tasp.df$method <- factor(abc.comparison.tasp.df$method, labels = c("MICE", "Rejection", "Target")) # c("Lenormand", "MICE", "Rejection", "Target")) abc.comparison.tasp.df$method <- factor(abc.comparison.tasp.df$method,levels(abc.comparison.tasp.df$method)[c(2, 1, 3)]) # c(3, 1, 2, 4) We want the levels in a particular order, for plotting and colour purposes #levels(abc.comparison.V8.df$method)
Let's plot how the calibrated models compare against the target statistics and eachother
# columns 9:24 capture the ART coverage at times 25:40 outputcomparison.data <- as.data.frame(rbind(summstats.calib.rej.interv.df, summstats.calib.mice.interv.df, target.vector.master)) names(outputcomparison.data) <- c(yvars.names, "incid.rel.reduc") outputcomparison.data$method <- c(rep(c("Rejection", "MICE"), each = nrow(summstats.calib.rej.interv.df)), "Target") outputcomparison.data$method <- factor(outputcomparison.data$method, labels = c("MICE", "Rejection", "Target")) # c("Lenormand", "MICE", "Rejection", "Target")) outputcomparison.data$method <- factor(outputcomparison.data$method,levels(outputcomparison.data$method)[c(1, 2, 3)]) # c(3, 1, 2, 4) We want the levels in a particular order, for plotting and colour purposes outputcomparison.data$is.target <- c(rep(0, nrow(outputcomparison.data)-1), 1) outputcomparison.data$unique.ID <- as.character(paste0(outputcomparison.data$y.1, outputcomparison.data$y.2, outputcomparison.data$y.3)) ### # Comparing various output statistics library(GGally) outputcomparison.data$is.target <- factor(outputcomparison.data$is.target) ggpairs(outputcomparison.data[, c(1,2,5,6,7, 41:42)], aes(colour = method, shape = is.target), columns = 1:5, columnLabels = c("AAD", "SDAD", "WVAD.base", "BVAD", "HIV+ 15-50 yo"), upper = list(continuous = "blank")) ggpairs(outputcomparison.data[, c(25:30, 33, 36,39, 41:42)], aes(colour = method, shape = is.target), columns = 1:9, columnLabels = c("<100 25-30", "<100 30-35", "<100 35-40", "100-200 25-30", "100-200 30-35", "100-200 35-40", "200-350 35-40", "350-500 35-40", ">500 35-40"), upper = list(continuous = "blank")) # To plot ART coverage over time, we need to turn wide into long format names(outputcomparison.data)[9:24] <- 25:40 outputcomparison.ARTcoverage.data <- tidyr::gather(data = outputcomparison.data, key = Time, convert = TRUE, value = ART.coverage, 9:24) ARTcoverage.comparison <- ggplot(data = outputcomparison.ARTcoverage.data, aes(x = Time, y = ART.coverage, alpha = factor(is.target), group = unique.ID, col = method)) + geom_line() + scale_colour_manual(values = cb.Palette) + scale_alpha_discrete(breaks = NULL, range = c(0.2, 1)) + #scale_shape_manual(breaks = NULL, # values = c(20, 10)) + #scale_size_manual(breaks = NULL, # values = 4) + xlab("Simulation time (years)") + ylab("ART coverage") + theme_bw() + guides(colour = guide_legend(title = "Method"))#, # override.aes = list(shape = c(20, 20, 20, 10), # size = c(4, 4, 4, 4)))) plot(ARTcoverage.comparison) ### Now let's plot the TasP impact estimate median.impact <- c(median(incidence.rel.reduc.calib.rej), median(incidence.rel.reduc.calib.mice), median(incidence.rel.reduc, na.rm = TRUE)) mean.impact <- c(mean(incidence.rel.reduc.calib.rej), mean(incidence.rel.reduc.calib.mice), mean(incidence.rel.reduc, na.rm = TRUE)) p25.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.25, na.rm = TRUE), quantile(incidence.rel.reduc.calib.mice, probs = 0.25, na.rm = TRUE), quantile(incidence.rel.reduc, probs = 0.25, na.rm = TRUE)) p75.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.75, na.rm = TRUE), quantile(incidence.rel.reduc.calib.mice, probs = 0.75, na.rm = TRUE), quantile(incidence.rel.reduc, probs = 0.75, na.rm = TRUE)) p025.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.025, na.rm = TRUE), quantile(incidence.rel.reduc.calib.mice, probs = 0.025, na.rm = TRUE), quantile(incidence.rel.reduc, probs = 0.025, na.rm = TRUE)) p975.impact <- c(quantile(incidence.rel.reduc.calib.rej, probs = 0.975, na.rm = TRUE), quantile(incidence.rel.reduc.calib.mice, probs = 0.975, na.rm = TRUE), quantile(incidence.rel.reduc, probs = 0.975, na.rm = TRUE)) method <- c("Rejection", "MICE", "Master") impact.comparison.data <- data.frame(median.impact = median.impact, mean.impact = mean.impact, p25.impact = p25.impact, p75.impact = p75.impact, p025.impact = p025.impact, p975.impact = p975.impact, method = method) impact.comparison <- ggplot(data = impact.comparison.data, aes(x = method, y = median.impact, col = method)) + geom_crossbar(aes(ymin = p25.impact, ymax = p75.impact), width = 0.2) + geom_errorbar(aes(ymin = p025.impact, ymax = p975.impact), width = 0.2) + scale_colour_manual(values = c(cb.Palette[3], cb.Palette[1], cb.Palette[2])) + xlab("") + ylim(0, 1) + ylab("Relative Reduction in HIV incidence") + # TasP effect on HIV incidence in the last 2 years of the simulation theme_bw() + guides(colour = guide_legend(title = "Method"))#, # override.aes = list(shape = c(20, 20, 20, 10), # size = c(4, 4, 4, 4)))) plot(impact.comparison)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.