Nothing
#' Calculate cluster model estimates using beta binomial model
#'
#' This function calculate smoothed direct estimates at given admin level.
#'
#' @param data dataframe that contains the indicator of interests, output of getDHSindicator function
#' @param cluster.info dataframe that contains admin 1 and admin 2 information and coordinates for each cluster.
#' @param admin.info dataframe that contains population and urban/rural proportion at specific admin level
#' @param admin admin level for the model
#' @param CI Credible interval to be used. Default to 0.95.
#' @param model smoothing model used in the random effect. Options are independent ("iid") or spatial ("bym2").
#' @param stratification whether or not to include urban/rural stratum.
#' @param aggregation whether or not report aggregation results.
#' @param overdisp.mean prior mean for logit(d), where d is the intracluster correlation.
#' @param overdisp.prec prior precision for logit(d), where d is the intracluster correlation.
#'
#' @return This function returns the dataset that contain district name and population for given tiff files and polygons of admin level,
#' @import dplyr
#' @importFrom survey svydesign svyby
#' @importFrom data.table data.table
#' @importFrom utils tail
#' @importFrom stats sd quantile rnorm
#' @importFrom matrixStats colMedians
#' @importFrom stats median na.omit
#' @importFrom SUMMER smoothSurvey
#' @author Qianyu Dong
#' @examples
#' \dontrun{
#' geo <- getDHSgeo(country = "Zambia", year = 2018)
#' data(ZambiaAdm1)
#' data(ZambiaAdm2)
#' data(ZambiaPopWomen)
#' cluster.info <- clusterInfo(geo = geo,
#' poly.adm1 = ZambiaAdm1,
#' poly.adm2 = ZambiaAdm2)
#'
#' dhsData <- getDHSdata(country = "Zambia",
#' indicator = "ancvisit4+",
#' year = 2018)
#'
#' data <- getDHSindicator(dhsData, indicator = "ancvisit4")
#' admin.info1 <- adminInfo(poly.adm = ZambiaAdm1,
#' admin = 1,
#' agg.pop =ZambiaPopWomen$admin1_pop,
#' proportion = ZambiaPopWomen$admin1_urban)
#' cl_res_ad1 <- clusterModel(data=data,
#' cluster.info = cluster.info,
#' admin.info = admin.info1,
#' stratification = FALSE,
#' model = "bym2",
#' admin = 1,
#' aggregation = TRUE,
#' CI = 0.95)
#' cl_res_ad1$res.admin1
#'
#' # compare with the DHS direct estimates
#' dhs_table <- get_api_table(country = "ZM",
#' survey = "ZM2018DHS",
#' indicator = "RH_ANCN_W_N4P",
#' simplify = TRUE)
#' subset(dhs_table, ByVariableLabel == "Five years preceding the survey")
#'
#' }
#'
#' @export
clusterModel<-function(data,cluster.info, admin.info, admin, CI = 0.95, model = c("bym2", "iid"), stratification = FALSE, aggregation = FALSE, overdisp.mean=0, overdisp.prec=0.4 ){
if (sum(is.na(data$value)) > 0) {
data <- data[rowSums(is.na(data)) == 0, ]
message("Removing NAs in indicator response")
}
# if(is.null(admin.info) && model != "iid"){
# message("No admin.info supplied. Using IID random effects.")
# model <- "iid"
# }
if( unique(is.na(admin.info$data$urban)) && stratification==T){
message("No urban/rural proportion found. Unstratified model is fitted instead")
stratification = F
}
if( unique(is.na(admin.info$data$population)) && aggregation==T){
message("No population found")
aggregation = F
}
if(model == "bym2"){
Amat <- admin.info$mat
}else{
Amat <- NULL
}
admin.mat <- Amat# Amat is sorted by admin1.name then admin2.name alphabetically for admin2
admin.info <- admin.info$data
modt<- left_join(data,cluster.info$data,by="cluster")
modt<- modt[!(is.na(modt$LONGNUM)), ]
# modt$strata.full <- paste(modt$admin1.name, modt$strata)
c.dat.tmp <- modt %>%
group_by(cluster) %>%
mutate(n = length(cluster)) %>%
mutate(value = sum(value,na.rm = T)) %>%
ungroup()%>%
distinct( cluster, .keep_all = TRUE)
admin_name_table<-admin.info
nregion <- dim(admin_name_table)[1]
admin_name_table$sID <- 1: dim(admin_name_table)[1] #sID is sorted by admin1.name then admin2.name alphabetically.
# add new rows corresponding to each region
if(admin==1){
c.dat.tmp[(dim(c.dat.tmp)[1]+1):(dim(c.dat.tmp)[1]+nregion),paste0("admin",admin,".name")] <- admin_name_table[,which(colnames(admin_name_table)== paste0("admin",admin,".name"))]
c.dat.tmp$ID <- 1:dim(c.dat.tmp)[1]
c.dat.tmp$sID <-admin_name_table$sID[match(as.data.frame(c.dat.tmp)[,which(colnames(c.dat.tmp)== paste0("admin",admin,".name"))],
admin_name_table[,which(colnames(admin_name_table)== paste0("admin",admin,".name"))])]
}else{
c.dat.tmp[(dim(c.dat.tmp)[1]+1):(dim(c.dat.tmp)[1]+nregion),"admin2.name.full"] <- admin_name_table[,which(colnames(admin_name_table)=="admin2.name.full")]
c.dat.tmp$ID <- 1:dim(c.dat.tmp)[1]
c.dat.tmp$sID <-admin_name_table$sID[match(as.data.frame(c.dat.tmp)[,which(colnames(c.dat.tmp)== "admin2.name.full")],
admin_name_table[,which(colnames(admin_name_table)== "admin2.name.full")])]
}
## MODEL setup
if(stratification==F){
if(model=="iid"){
pc.u = 1
pc.alpha = 0.01
formula <- value ~ 1 + f(sID, model = model,graph = admin.mat,
hyper = list(prec = list(prior = "pc.prec",
param = c(pc.u , pc.alpha))))
}else if(model=="bym2"){
pc.u = 1
pc.alpha = 0.01
pc.u.phi <- 0.5
pc.alpha.phi <- 2/3
formula <- value ~ 1+
f(sID, model = model, graph = admin.mat,
hyper = list(
prec = list(prior = "pc.prec", param = c(pc.u , pc.alpha)),
phi = list(prior = 'pc', param = c(pc.u.phi , pc.alpha.phi))))
}
}else if(stratification){ # + strata
if(model=="iid"){
pc.u = 1
pc.alpha = 0.01
formula <- value ~ 1 + strata+ f(sID, model = model,graph = admin.mat,
hyper = list(prec = list(prior = "pc.prec",
param = c(pc.u , pc.alpha))))
}else if(model=="bym2"){
pc.u = 1
pc.alpha = 0.01
pc.u.phi <- 0.5
pc.alpha.phi <- 2/3
formula <- value ~ 1 + strata +
f(sID, model = model, graph = admin.mat,
hyper = list(
prec = list(prior = "pc.prec", param = c(pc.u , pc.alpha)),
phi = list(prior = 'pc', param = c(pc.u.phi , pc.alpha.phi))))
}
}
## MODELING:
overdisp.mean=overdisp.mean
overdisp.prec=overdisp.prec
control.family <- list(hyper = list(rho = list(param = c(overdisp.mean, overdisp.prec), initial = overdisp.mean)))
imod<- INLA::inla(formula,
family="betabinomial",
data=c.dat.tmp,
Ntrials=n,
control.predictor = list(compute=TRUE, link = 1),
control.compute = list(config = TRUE),
control.family = control.family)
nsamp <- 1000
samp <- INLA::inla.posterior.sample(n = nsamp, result = imod, intern = TRUE)
#draw posterior samples
if(stratification==F){
draw.all <- matrix(NA, nsamp, nregion)
for(i in 1:length(samp)){
tmp <- samp[[i]]$latent
s.effect <- tmp[paste0("sID:", 1:nregion), 1]
intercept <- tmp["(Intercept):1", 1]
draw.all[i, ] <- SUMMER::expit(s.effect + intercept)
}
}else if(stratification){
draw.u <- matrix(NA, nsamp, nregion)
draw.r <- matrix(NA, nsamp, nregion)
draw.all <- matrix(NA, nsamp, nregion)
for(i in 1:length(samp)){
tmp <- samp[[i]]$latent
s.effect <- tmp[paste0("sID:", 1:nregion), 1]
intercept <- tmp["(Intercept):1", 1]
str.effect <- tmp["stratarural:1", 1]
draw.u[i, ] <- expit(s.effect + intercept)
draw.r[i, ] <- expit(s.effect + intercept + str.effect)
draw.all[i, ] <- draw.u[i, ] * admin.info$urban +
draw.r[i, ] * (1 - admin.info$urban)
}
}
#aggregation
if(admin==1){
if(stratification==F){
admin1.bb.res<- data.frame(cbind(
admin1.name=admin.info$admin1.name[tail(c.dat.tmp$sID,n=nregion)],
mean= tail(imod$summary.fitted.values,n=nregion)[,1],
median=colMedians(draw.all),
sd= tail(imod$summary.fitted.values,n=nregion)[,2],
var = tail(imod$summary.fitted.values,n=nregion)[,2]^2,
lower= tail(imod$summary.fitted.values,n=nregion)[,3],
upper= tail(imod$summary.fitted.values,n=nregion)[,5]))
admin1.bb.res$mean<-as.numeric (admin1.bb.res$mean)
admin1.bb.res$median<-as.numeric (admin1.bb.res$median)
admin1.bb.res$sd<-as.numeric (admin1.bb.res$sd)
admin1.bb.res$var<-admin1.bb.res$sd^2
admin1.bb.res$lower<-as.numeric (admin1.bb.res$lower)
admin1.bb.res$upper<-as.numeric (admin1.bb.res$upper)
admin1.bb.res$cv=admin1.bb.res$sd/admin1.bb.res$mean
}else if(stratification){
post.u <- apply(draw.u, 2, mean)
post.r <- apply(draw.r, 2, mean)
post.all <- apply(draw.all, 2, mean)
post.u.sd <- apply(draw.u, 2, sd)
post.r.sd <- apply(draw.r, 2, sd)
post.all.sd <- apply(draw.all, 2, sd)
post.u.median <- apply(draw.u, 2, median)
post.r.median<- apply(draw.r, 2, median)
post.all.median <- apply(draw.all, 2, median)
post.u.ci <- apply(draw.u, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))
post.r.ci <- apply(draw.r, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))
post.all.ci <- apply(draw.all, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))
admin1.bb.res <- data.frame(mean = c(post.u, post.r, post.all),
median = c(post.u.median, post.r.median, post.all.median),
sd= c(post.u.sd, post.r.sd, post.all.sd),
var = c(post.u.sd^2, post.r.sd^2, post.all.sd^2),
lower=c(post.u.ci[1,], post.r.ci[1,], post.all.ci[1,]),
upper=c(post.u.ci[2,], post.r.ci[2,], post.all.ci[2,]),
type = c(rep("urban", nregion), rep("rural", nregion),
rep("full", nregion)))
admin1.bb.res$admin1.name <- rep(admin.info$admin1.name, 3)
admin1.bb.res$cv=admin1.bb.res$sd/admin1.bb.res$mean
}
#admin.bb.res
#agg.nationl
if(aggregation==T){
post.all <- draw.all%*% admin.info$population/sum(admin.info$population)
agg.natl <- data.frame(mean = mean(post.all),
median = median(post.all),
sd= sd(post.all),
var = var(post.all),
lower=quantile(post.all, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))[1],
upper=quantile(post.all, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))[2])
}
if(aggregation==F){
return(list(res.admin1=admin1.bb.res,inla=imod ))
}else{
return(list(res.admin1=admin1.bb.res,agg.natl=agg.natl,inla=imod,
admin1_post=draw.all,nation_post=post.all))
}
}else if(admin==2){
if(stratification==F){
admin2.bb.res<- data.frame(cbind(
admin2.name.full=admin.info$admin2.name.full[tail(c.dat.tmp$sID,n=nregion)],
mean= tail(imod$summary.fitted.values,n=nregion)[,1],
median=colMedians(draw.all),
sd= tail(imod$summary.fitted.values,n=nregion)[,2],
var = tail(imod$summary.fitted.values,n=nregion)[,2]^2,
lower= tail(imod$summary.fitted.values,n=nregion)[,3],
upper= tail(imod$summary.fitted.values,n=nregion)[,5]))
admin2.bb.res$mean<-as.numeric (admin2.bb.res$mean)
admin2.bb.res$median<-as.numeric (admin2.bb.res$median)
admin2.bb.res$sd<-as.numeric (admin2.bb.res$sd)
admin2.bb.res$var <- admin2.bb.res$sd^2
admin2.bb.res$lower<-as.numeric (admin2.bb.res$lower)
admin2.bb.res$upper<-as.numeric (admin2.bb.res$upper)
admin2.bb.res$cv=admin2.bb.res$sd/admin2.bb.res$mean
admin2.bb.res<-left_join(admin2.bb.res,distinct(admin.info),by="admin2.name.full")
}else if(stratification){
post.u <- apply(draw.u, 2, mean)
post.r <- apply(draw.r, 2, mean)
post.all <- apply(draw.all, 2, mean)
post.u.sd <- apply(draw.u, 2, sd)
post.r.sd <- apply(draw.r, 2, sd)
post.all.sd <- apply(draw.all, 2, sd)
post.u.median <- apply(draw.u, 2, median)
post.r.median<- apply(draw.r, 2, median)
post.all.median <- apply(draw.all, 2, median)
post.u.ci <- apply(draw.u, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))
post.r.ci <- apply(draw.r, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))
post.all.ci <- apply(draw.all, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))
admin2.bb.res <- data.frame(mean = c(post.u, post.r, post.all),
median = c(post.u.median, post.r.median, post.all.median),
sd= c(post.u.sd, post.r.sd, post.all.sd),
var = c(post.u.sd^2, post.r.sd^2, post.all.sd^2),
lower=c(post.u.ci[1,], post.r.ci[1,], post.all.ci[1,]),
upper=c(post.u.ci[2,], post.r.ci[2,], post.all.ci[2,]),
type = c(rep("urban", nregion), rep("rural", nregion),
rep("full", nregion)))
admin2.bb.res$admin2.name.full <- rep(admin.info$admin2.name.full, 3)
admin2.bb.res$cv=admin2.bb.res$sd/admin2.bb.res$mean
admin2.bb.res<-left_join(admin2.bb.res,distinct(admin.info),by="admin2.name.full")
}
if(aggregation==T){
#agg admin1
weight=admin.info$population/admin.info$population.admin1
# post.all <- data.table(weight*draw.all)
post.all <- data.table::data.table(t(weight*t(draw.all)))#nrow=legth(weight)
colnames(post.all) <- admin.info$admin1.name
subgroups<-split.default(post.all, names(post.all))
sums_list <- lapply(subgroups, function(subgroup) {
rowSums(subgroup)
})
admin1.samp <- do.call(cbind, sums_list)
agg.admin1 <- data.frame(mean = colMeans(admin1.samp),
median=colMedians(admin1.samp),
sd= apply(admin1.samp, 2, sd),
var = apply(admin1.samp, 2, var),
lower= apply(admin1.samp, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))[1,],
upper= apply(admin1.samp, 2, quantile, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))[2,]
)
agg.admin1$admin1.name=rownames(agg.admin1)
#agg national
unique( admin.info$population.admin1)/sum(unique( admin.info$population.admin1))
post.all <- admin1.samp%*% unique( admin.info$population.admin1)/sum(unique( admin.info$population.admin1))
agg.natl <- data.frame(mean = mean(post.all),
median=median(post.all),
sd= sd(post.all),
var = var(post.all),
lower=quantile(post.all, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))[1],
upper=quantile(post.all, probs = c((1 - CI) / 2, 1 - (1 - CI) / 2))[2])
}
# colnames(admin2.bb.res)[colnames(admin2.bb.res) == 'admin2.name.full'] <- 'admin2.name.full'
if(aggregation==F){
return(list(res.admin2=admin2.bb.res, inla=imod,
admin2_post=draw.all))
}else{
return(list(res.admin2=admin2.bb.res,agg.admin1=agg.admin1,agg.natl=agg.natl,inla=imod,
admin2_post=draw.all,admin1_post=admin1.samp,nation_post=post.all))
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.