#' Download individual level data for fertility analysis
#' @description Download individual level DHS data for fertility analysis
#' @param clusters Dataframe of geolocated survey clusters
#' @param surveys Dataframe of DHS surveys
#' @return List of individual-level data with clusters assigned to area IDs
#' @importFrom dplyr %>%
#' @importFrom rlang .data
#' @export
get_fertility_surveys <- function(surveys, clusters) {
ird <- rdhs::dhs_datasets(fileType = "IR", fileFormat = "flat", surveyIds = surveys$SurveyId)
ird <- ird %>%
dplyr::mutate(path = unlist(rdhs::get_datasets(., clear_cache = TRUE))) %>%
dplyr::bind_rows()
iso3_c <- countrycode::countrycode(unique(surveys$CountryName), "country.name", "iso3c")
ir <- lapply(ird$path, readRDS) %>%
lapply(function(x) {class(x) <- "data.frame"
return(x)}) %>%
Map(function(ir, surveys) {
if(!is.null(ir$aidsex))
ir <- filter(ir, aidsex == 2)
dplyr::mutate(ir,
survey_id = surveys$survey_id,
iso3 = iso3_c,
survyear = surveys$SurveyYear,
survtype = surveys$SurveyType) %>%
select(
all_of(c("survey_id", "survtype", "survyear", "v001", "v021", "caseid", "v011", "v008", "v005")),
any_of(paste0("b3_", c(paste0(0, 1:9), 10:20))))
}, ., dplyr::group_split(surveys, SurveyId))
if(iso3_c == 'ETH') {
adjust_eth_months <- function(ir) {
col_positions <- grep("v011|v008|^b3\\_[0-9]*", colnames(ir))
ir <- ir %>%
dplyr::mutate(dplyr::across(col_positions, ~{.x+92}))
}
ir <- lapply(ir, adjust_eth_months)
}
ir <- lapply(ir, function(x) left_join(x, clusters, by = c("v001" = "cluster_id", "survey_id")))
ir
}
assign_tips <- function(ir, single_tips) {
survey_type <- ir %>%
lapply("[", "survtype") %>%
lapply(unique) %>%
dplyr::bind_rows()
if(!single_tips)
# tips_surv <- list("DHS" = c(0,15), "MIS" = c(0,5), "AIS" = c(0,5))[survey_type$survtype]
tips_surv <- lapply(survey_type$survtype, function(x) {
if(x=="DHS")
c(0,15)
else
c(0,5)
})
else
# tips_surv <- list("DHS" = c(0:15), "MIS" = c(0:5), "AIS" = c(0:5))[survey_type$survtype]
tips_surv <- lapply(survey_type$survtype, function(x) {
if(x=="DHS")
c(0:15)
else
c(0:5)
})
tips_surv
}
map_ir_to_areas <- function(ir, cluster_list, single_tips = TRUE) {
mc.cores <- if(.Platform$OS.type == "windows") 1 else parallel::detectCores()
ir <- Map(ir_by_area, ir, cluster_list[names(ir)], n = 1:length(ir), total = length(ir)) %>%
unlist(recursive = FALSE)
tips_surv <- assign_tips(ir, single_tips)
dat <- list()
dat$ir <- ir
dat$tips_surv <- tips_surv
dat
}
#' Aggregate district level cluster dataframe
#' @description Aggregate a dataframe of survey clusters geolocated to an administrative to any higher administrative level
#' @param clusters Dataframe of geolocated survey clusters
#' @param areas_wide Area hierarchy in wide format
#' @param area_level Integer referring to administrative level, where 0 is national, 1 is provincial etc.
#' @return Dataframe list of clusters by administrative area, one list item per survey
#' @seealso \code{\link{get_areas}}
#' @importFrom dplyr %>%
#' @importFrom rlang .data
#' @export
assign_cluster_area <- function(clusters, areas_wide, area_level) {
areas <- clusters %>%
dplyr::rename(area_id = geoloc_area_id) %>%
dplyr::left_join(areas_wide %>% dplyr::select(area_id, paste0("area_id", area_level))) %>%
dplyr::select(-area_id) %>%
dplyr::rename(area_id = paste0("area_id", area_level))
area_list <- areas %>%
dplyr::group_by(survey_id) %>%
dplyr::group_split(keep = TRUE)
names(area_list) <- area_list %>%
lapply("[", "survey_id") %>%
lapply(unique) %>%
dplyr::bind_rows %>%
.$survey_id
return(area_list)
}
#' Join individual recode survey datasets by area
#' @param ir Individual recode survey dataset
#' @param area_list List of areas
ir_by_area <- function(ir, area_list, n, total) {
print(paste(n, "of", total))
ir_int <- ir %>%
dplyr::left_join(area_list, by=c("v001" = "cluster_id")) %>%
dplyr::mutate(survey_id = factor(survey_id),
survtype = factor(survtype),
survyear = factor(survyear),
area_id = factor(area_id)) %>%
dplyr::filter(!is.na(area_id)) %>%
dplyr::group_split(area_id)
return(ir_int)
}
# #' Join individual recode survey datasets by cluster to area IDs
# #' @param surveys Survey dataframe as created by \code{rdhs::dhs_surveys()}
# #' @param cluster_areas Dataframe of clusters assigned to areas. Output of \code{\link{cluster_areas}}.
# #' @param single_tips desc this
# #' @return Nested list: \code{df$ir} and \code{df$tips_surv}. \code{df$ir} has one list item per survey/area combination and \code{df$tips_surv} has the corroborating TIPS value.
# #' @export
# #' @seealso \code{\link{cluster_areas}}
#
# clusters_to_surveys <- function(iso3, surveys, cluster_areas, level, single_tips = TRUE) {
#
# ird <- dhs_datasets(fileType = "IR", fileFormat = "flat", surveyIds = surveys$SurveyId)
#
# ird <- ird %>%
# mutate(path = unlist(get_datasets(.))) %>%
# bind_rows()
#
# ir <- lapply(ird$path, readRDS) %>%
# lapply(function(x) {class(x) <- "data.frame"
# return(x)}) %>%
# Map(function(ir, surveys) {
# mutate(ir,
# surveyid = surveys$survey_id,
# country = surveys$CountryName,
# survyear = surveys$SurveyYear,
# survtype = surveys$SurveyType)
# }, ., group_split(surveys, SurveyId))
#
#
# ## I think this mess was necessary because the order of cluster_areas was not always the same as the order for ir.. Double check. Otherwise the simple names(ir) <- names(cluster_areas) is sufficient
# # names(ir) <- ir %>%
# # lapply("[", "surveyid") %>%
# # lapply(unique) %>%
# # bind_rows %>%
# # left_join(clusters %>%
# # select(survey_id, DHS_survey_id) %>%
# # unique,
# # by=c("surveyid" = "DHS_survey_id")) %>%
# # select(-surveyid) %>%
# # .$survey_id
#
# names(ir) <- names(cluster_areas)
#
# if(iso3 == "ETH") {
#
# cols_edit <- c("v008", "v011", "b3_01", "b3_02", "b3_03", "b3_04", "b3_05", "b3_06", "b3_07", "b3_08", "b3_09", "b3_10", "b3_11", "b3_12", "b3_13", "b3_14", "b3_15", "b3_16", "b3_17", "b3_18", "b3_19", "b3_20")
#
# ir <- ir %>%
# lapply(function(x) x %>% mutate_at(.vars = cols_edit, .funs = ~(.+92)))
#
# }
#
#
# if(level > 0) {
#
# ir <- Map(ir_by_area, ir, cluster_areas[names(ir)], n=1:length(ir), total=length(ir)) %>%
# unlist(recursive = FALSE)
#
# survey_type <- ir %>%
# lapply("[", "survtype") %>%
# lapply(unique) %>%
# bind_rows
#
# if(!single_tips)
# tips_surv <- list("DHS" = c(0,15), "MIS" = c(0,5), "AIS" = c(0,5))[survey_type$survtype]
# else
# tips_surv <- list("DHS" = c(0:15), "MIS" = c(0:5), "AIS" = c(0:5))[survey_type$survtype]
#
# } else {
#
# ir <- lapply(ir, function(x) {
# mutate(x, area_id = unique(surveys$iso3))
# })
#
# if(!single_tips)
# tips_surv <- list("DHS" = c(0,15), "MIS" = c(0,5), "AIS" = c(0,5))[surveys$SurveyType]
# else
# tips_surv <- list("DHS" = c(0:15), "MIS" = c(0:5), "AIS" = c(0:5))[surveys$SurveyType]
#
# }
#
# dat <- list()
# dat$ir <- ir
# dat$tips_surv <- tips_surv
#
# return(dat)
#
# }
#' Calculate fertility rates from MICS data
#' @export
calculate_mics_fertility <- function(iso3_c, mics_wm, mics_births_to_women, mapping, areas) {
mc.cores <- if(.Platform$OS.type == "windows") 1 else parallel::detectCores()
levels <- c(0,
mapping$admin1_level[mapping$iso3 == iso3_c],
mapping$fertility_fit_level[mapping$iso3 == iso3_c])
areas_wide <- spread_areas(areas)
mics_wm_asfr <- mics_wm %>%
utils::type.convert(as.is = T) %>%
dplyr::mutate(survey_id = factor(survey_id),
area_id = factor(area_id)) %>%
left_join(areas %>% select(area_id, area_level) %>% st_drop_geometry())
mics_high_admin <- mics_wm_asfr %>%
filter(area_level > 1) %>%
group_by(area_level) %>%
group_split()
mics_high_admin <- mics_high_admin %>%
lapply(function(x) {
level <- unique(x$area_level)
col_name <- paste0("area_id", level)
x %>%
rename_with(~paste(col_name), "area_id") %>%
left_join(areas_wide %>% select(all_of(col_name), area_id1) %>% st_drop_geometry() %>% distinct()) %>%
rename_with(~"area_id", paste(col_name))
}) %>%
bind_rows()
mics_wm_asfr <- mics_wm_asfr %>%
filter(area_level %in% c(0,1)) %>%
mutate(area_id1 = area_id) %>%
bind_rows(mics_high_admin)
mics_births_asfr <- mics_births_to_women %>%
dplyr::left_join(mics_wm) %>%
dplyr::mutate(survey_id = factor(survey_id),
area_id = factor(area_id)) %>%
left_join(mics_wm_asfr %>% select(area_id, area_id1) %>% distinct())
calculate_mics_fr <- function(curr_level, levels, mics_wm_asfr, mics_births_asfr) {
if(curr_level == 0) {
mics_wm_asfr$area_id <- iso3_c
mics_births_asfr$area_id <- iso3_c
} else if(curr_level == levels[2]) {
mics_births_asfr <- mics_births_asfr %>%
select(-area_id) %>%
rename(area_id = area_id1)
mics_wm_asfr <- mics_wm_asfr %>%
select(-area_id) %>%
rename(area_id = area_id1)
}
mics_births_asfr <- mics_births_asfr %>%
dplyr::arrange(survey_id, area_id) %>%
dplyr::group_by(survey_id, area_id) %>%
dplyr::group_split()
mics_wm_asfr <- mics_wm_asfr %>%
dplyr::arrange(survey_id, area_id) %>%
dplyr::group_by(survey_id, area_id) %>%
dplyr::group_split()
mics_asfr <- parallel::mcMap(calc_asfr, mics_wm_asfr,
by = list(~area_id + survey_id),
tips = list(c(0:15)),
agegr= list(3:10*5),
period = list(1995:2019),
clusters = list(~cluster),
strata = list(NULL),
id = list("unique_id"),
dob = list("wdob"),
intv = list("doi"),
weight = list("weight"),
varmethod = list("none"),
bhdata = mics_births_asfr,
bvars = list("cdob"),
counts = TRUE,
mc.cores = mc.cores) %>%
dplyr::bind_rows() %>%
utils::type.convert(as.is = T) %>%
tidyr::separate(col = survey_id, into = c(NA, "survyear", NA), sep = c(3,7), remove = FALSE, convert = TRUE) %>%
dplyr::filter(period <= survyear) %>%
# rename(age_group = agegr) %>%
dplyr::mutate(survtype = "MICS",
iso3 = iso3_c
) %>%
dplyr::left_join(naomi::get_age_groups() %>% dplyr::select(age_group, age_group_label), by = c("agegr" = "age_group_label")) %>%
dplyr::select(-agegr)
# For plotting:
mics_asfr_plot <- Map(calc_asfr, mics_wm_asfr,
by = list(~area_id + survey_id),
tips = list(NULL),
agegr= list(3:10*5),
period = list(1995:2019),
clusters = list(~cluster),
strata = list(NULL),
id = list("unique_id"),
dob = list("wdob"),
intv = list("doi"),
weight = list("weight"),
varmethod = list("none"),
bhdata = mics_births_asfr,
bvars = list("cdob"),
counts = TRUE) %>%
dplyr::bind_rows() %>%
utils::type.convert(as.is = T) %>%
tidyr::separate(col = survey_id, into = c(NA, "survyear", NA), sep = c(3,7), remove = FALSE, convert = TRUE) %>%
dplyr::filter(period <= survyear) %>%
dplyr::rename(value = asfr) %>%
# rename(age_group = agegr) %>%
dplyr::mutate(survtype = "MICS",
iso3 = iso3_c,
variable = "asfr"
) %>%
dplyr::left_join(naomi::get_age_groups() %>% dplyr::select(age_group, age_group_label), by = c("agegr" = "age_group_label")) %>%
dplyr::select(-agegr)
mics_tfr_plot <- mics_asfr_plot %>%
dplyr::group_by(iso3, survey_id, period, area_id) %>%
dplyr::summarise(value = 5 * sum(value),
pys = sum(pys)) %>%
dplyr::mutate(variable = "tfr") %>%
dplyr::filter(value > 0.5)
mics_plot <- dplyr::bind_rows(mics_tfr_plot, mics_asfr_plot)
out <- list()
out$asfr <- mics_asfr
out$plot <- mics_plot
out
}
mics_fr <- lapply(levels, calculate_mics_fr, levels, mics_wm_asfr, mics_births_asfr)
names(mics_fr) <- c("national", "provincial", "district")
mics_fr
# missing_data <- mics_asfr %>%
# dplyr::group_by(survey_id, tips) %>%
# dplyr::summarise(births = sum(births)) %>%
# dplyr::filter(births < 5) %>%
# dplyr::group_by(survey_id) %>%
# dplyr::group_split()
#
# i <- 0
#
# while(i < length(missing_data)) {
# i <- i+1
#
# mics_asfr <- mics_asfr %>%
# dplyr::filter(!(survey_id == unique(missing_data[[i]]$survey_id) & tips %in% unique(missing_data[[i]]$tips)))
#
# years <- unique(dplyr::filter(mics_asfr, survey_id == unique(missing_data[[i]]$survey_id))$period)
#
# mics_plot <- mics_plot %>%
# dplyr::filter(!(survey_id == unique(missing_data[[i]]$survey_id) & !period %in% years))
#
# }
# out <- list()
# out$mics_asfr <- mics_asfr
# out$mics_plot <- mics_plot
#
# out
}
#' Calculate fertility rates from DHS, MIS, AIS data
#' @export
calculate_dhs_fertility <- function(iso3_c, dat, mapping) {
mc.cores <- if(.Platform$OS.type == "windows") 1 else parallel::detectCores()
levels <- c(0,
mapping$admin1_level[mapping$iso3 == iso3_c],
mapping$fertility_fit_level[mapping$iso3 == iso3_c])
calculate_fr <- function(level, dat) {
area <- paste0("area_id", level)
message(area)
# tst <- lapply(dat, function(x) {
# x %>%
# select(c("survey_id", "survtype", "survyear", "v021", "caseid", "v011", "v008", "v005"),
# paste0("b3_", c(paste0(0, 1:9), 10:20)),
# starts_with("area_id"))
# })
#
dat <- lapply(dat, function(x) {
if(level == 0) {
x %>%
mutate(curr_area_id = iso3_c)
} else {
x <- x %>%
rename_with(~"curr_area_id", paste(area)) %>%
mutate(curr_area_id = ifelse(is.na(curr_area_id), area_id, curr_area_id)) %>%
group_by(curr_area_id) %>%
group_split()
}
})
if(level != 0) {
dat <- unlist(dat, recursive = F)
}
asfr <- parallel::mcMap(calc_asfr,
# dat$ir,
dat,
by = list(~survey_id + survtype + survyear + curr_area_id),
tips = list(c(0:15)),
agegr= list(3:10*5),
period = list(1995:2022),
strata = list(NULL),
varmethod = list("none"),
counts = TRUE,
mc.cores = mc.cores) %>%
dplyr::bind_rows() %>%
utils::type.convert(as.is = T) %>%
dplyr::filter(period <= survyear) %>%
# rename(age_group = agegr) %>%
dplyr::mutate(iso3 = iso3_c) %>%
dplyr::left_join(naomi::get_age_groups() %>% dplyr::select(age_group, age_group_label), by = c("agegr" = "age_group_label")) %>%
dplyr::select(-agegr, area_id = curr_area_id)
if(length(grep(paste(c(0, levels[[2]]), collapse = "|"), level))) {
asfr_plot <- parallel::mcMap(calc_asfr,
dat,
by = list(~survey_id + survtype + survyear + curr_area_id),
tips = list(NULL),
agegr= list(3:10*5),
period = list(1995:2022),
strata = list(NULL),
varmethod = list("none"),
counts = TRUE,
mc.cores = mc.cores) %>%
dplyr::bind_rows() %>%
utils::type.convert(as.is = T) %>%
dplyr::filter(period <= survyear) %>%
# rename(age_group = agegr) %>%
dplyr::mutate(iso3 = iso3_c,
variable = "asfr") %>%
dplyr::left_join(naomi::get_age_groups() %>% dplyr::select(age_group, age_group_label), by = c("agegr" = "age_group_label")) %>%
dplyr::select(-agegr, value = asfr, area_id = curr_area_id)
tfr_plot <- asfr_plot %>%
dplyr::group_by(iso3, survey_id, period, area_id) %>%
dplyr::summarise(value = 5 * sum(value),
pys = sum(pys)) %>%
dplyr::mutate(variable = "tfr") %>%
dplyr::filter(value > 0.5)
plot <- bind_rows(asfr_plot, tfr_plot)
} else {
plot <- data.frame()
}
out <- list()
out$asfr <- asfr
out$plot <- plot
out
}
fr <- lapply(levels, calculate_fr, dat)
names(fr) <- c("national", "provincial", "district")
fr
# message("Calculating aggregate fertility rates")
#
#
# asfr_plot_nat <- Map(calc_asfr, ir$ir,
# # by = list(~survey_id + survtype + survyear + area_id),
# tips = list(c(0,15)),
# agegr= list(3:10*5),
# period = list(1995:2020),
# strata = list(NULL),
# varmethod = list("none"),
# counts = TRUE) %>%
# dplyr::bind_rows(.id = "survey_id") %>%
# tidyr::separate(survey_id, into = c(NA, "survyear", "survtype"), sep = c(3, 7), remove = FALSE, convert = TRUE) %>%
# utils::type.convert() %>%
# dplyr::filter(period<=survyear) %>%
# dplyr::mutate(iso3 = iso3,
# area_id = iso3,
# variable = "asfr") %>%
# dplyr::rename(value = asfr) %>%
# dplyr::left_join(naomi::get_age_groups() %>% dplyr::select(age_group, age_group_label), by = c("agegr" = "age_group_label")) %>%
# dplyr::select(-agegr)
#
# tfr_plot <- asfr_plot %>%
# dplyr::group_by(survey_id, period, area_id) %>%
# dplyr::summarise(value = 5 * sum(value)) %>%
# dplyr::mutate(variable = "tfr") %>%
# dplyr::filter(value > 0.5)
#
# tfr_plot_nat <- asfr_plot_nat %>%
# dplyr::group_by(survey_id, period, area_id) %>%
# dplyr::summarise(value = 5 * sum(value)) %>%
# dplyr::mutate(variable = "tfr") %>%
# dplyr::filter(value > 0.5)
# missing_data <- fr$national$asfr %>%
# dplyr::group_by(survey_id, tips) %>%
# dplyr::summarise(births = sum(births)) %>%
# dplyr::filter(births < 5) %>%
# dplyr::group_by(survey_id) %>%
# dplyr::group_split()
#
# # plot <- dplyr::bind_rows(tfr_plot, tfr_plot_nat, asfr_plot, asfr_plot_nat)
#
# i <- 0
#
# while(i < length(missing_data)) {
# i <- i+1
#
# foo <- lapply(fr, "[[", "asfr") %>%
# lapply(function(x) {
# filter(x, !(survey_id == unique(missing_data[[i]]$survey_id) & tips %in% unique(missing_data[[i]]$tips)))
# })
#
# # asfr <- asfr %>%
# # dplyr::filter(!(survey_id == unique(missing_data[[i]]$survey_id) & tips %in% unique(missing_data[[i]]$tips)))
#
# # years <- unique(dplyr::filter(asfr, survey_id == unique(missing_data[[i]]$survey_id))$period)
# #
# # plot <- plot %>%
# # dplyr::filter(!(survey_id == unique(missing_data[[i]]$survey_id) & !period %in% years))
#
#
# }
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.