This vignette illustrates use of demogsurv
and rdhs
to calculate fertility and mortality indicators for lots of DHS surveys in sub-Saharan Africa, and compare estimates to those produced for the DHS StatCompiler. It is currently a hastily developed analysis script, though may be further developed in the future.
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
## install.packages("devtools") ## install.packages("rdhs") ## devtools::install_github("mrc-ide/demogsurv") library(rdhs) library(demogsurv) library(ggplot2) library(haven)
Identify all DHS surveys conducted in sub-Saharan Africa in 2015
countries <- dhs_countries() cc <- countries[countries$RegionName == "Sub-Saharan Africa", ]$DHS_CountryCode surveys <- dhs_surveys(countryIds = cc, surveyYear = 2015, surveyType = "DHS")
Identify individual recode (IR) and births recode (BR) datasets corresponding to these surveys.
ird <- dhs_datasets(fileType = "IR", fileFormat = "flat") ird <- ird[ird$SurveyId %in% surveys$SurveyId, ] brd <- dhs_datasets(fileType = "BR", fileFormat = "flat") brd <- brd[brd$SurveyId %in% surveys$SurveyId, ]
Use rdhs
to retrieve datasets, downloading them from DHS website if not already in the rdhs
cache.
get_datasets(ird$FileName) get_datasets(brd$FileName)
Identify survey variables to extract from the datasets that will be required for fertility and mortality indicators
ird_vars <- c("caseid", "v005", "v008", "v011", "v021", "v024", "v025") ird_questions <- search_variables(ird$FileName, variables = ird_vars) # add child dob variables ird_br_questions <- search_variable_labels(ird$FileName, regex = "CMC") ird_br_questions <- ird_br_questions[grepl("^b3", ird_br_questions$variable), ] # add sibling variables ird_sib_questions <- search_variable_labels(ird$FileName, regex = "sib") ird_questions <- rbind(ird_questions, ird_br_questions, ird_sib_questions) brd_vars <- c("caseid", "bidx", "b3", "b5", "b7", "v005", "v008", "v021", "v024", "v025") brd_questions <- search_variables(brd$FileName, variables = brd_vars)
Extract relevant survey variables from the downloaded datasets into a data.frame
ir <- extract_dhs(ird_questions, add_geo = FALSE) br <- extract_dhs(brd_questions, add_geo = FALSE) ## Convert to factors (a bit inefficient) ir <- lapply(ir, haven::as_factor) br <- lapply(br, haven::as_factor) ## Add survey-level variables ir <- Map(data.frame, SurveyId = haven::as_factor(surveys$SurveyId), CountryName = haven::as_factor(surveys$CountryName), SurveyYear = haven::as_factor(surveys$SurveyYear), ir) br <- Map(data.frame, SurveyId = haven::as_factor(surveys$SurveyId), CountryName = haven::as_factor(surveys$CountryName), SurveyYear = haven::as_factor(surveys$SurveyYear), br)
demogsurv
to analyse demographic rate indicatorsCalculate TFR and 15-19 ASFR for 3 year period preceding survey (default argument tips=c(0, 3)
).
tfr <- lapply(ir, calc_tfr, by=~SurveyId+CountryName+SurveyYear, strata=NULL) tfr <- do.call(rbind, tfr) asfr15to19 <- lapply(ir, calc_asfr, by=~SurveyId + CountryName + SurveyYear, agegr = c(15, 20), strata=NULL) asfr15to19 <- do.call(rbind, asfr15to19)
Identify surveys that include sibling history model via querying the DHS API survery with "Maternal mortality" characteristic.
survchar <- dhs_survey_characteristics() survchar[grepl("Maternal", survchar$SurveyCharacteristicName), ] mm_surv <- dhs_surveys(surveyCharacteristicIds = 1) has_mm <- surveys$SurveyId %in% mm_surv$SurveyId
Reshape IR datasets to one row per sibling episode, create a binary variable indicating sibling death, and calculate ~35~q~15~ estimates by sexx.
sib <- lapply(ir[has_mm], reshape_sib_data, widevars = c("SurveyId", "CountryName", "SurveyYear", "v005", "v008", "v021")) sib <- lapply(sib, function(x){x$death <- factor(x$mm2, c("dead", "alive")) == "dead"; x}) q3515 <- lapply(sib, calc_nqx, by=~SurveyId+CountryName+SurveyYear + mm1, strata = NULL, agegr=seq(15, 50, 5), tips=c(0, 7), dob="mm4", dod="mm8") q3515 <- do.call(rbind, q3515)
demogsurv
does not yet implement the exact child mortality calculation produced in DHS
reports and DHS StatCompiler (see Rutstein and Rojas 2006.
This is planned for future implementation.
The function calc_nqx()
calculates piecewise constant mortality rates within
age groups 0, 1-2, 3-4, 5-11, 12-24 months, and 2, 3, and 4-5 years (parameter
agegr = c(0, 1, 3, 5, 12, 24, 36, 48, 60)/12
). These are aggregated to a cumulative
hazards over the age group 0-4 years and converted to probabilities to estimate ~5~q~0~.
Add a binary indicator whether a death occurred and a date of death variable, placed 0.5 months in the month the death occurred.
br <- lapply(br, function(x){x$death <- x$b5 == "no"; x}) br <- lapply(br, function(x){x$dod <- x$b3 + x$b7 + 0.5; x})
Calculate ~5~q~0~ for period 0-4, 5-9, and 10-14 years preceding the survey.
u5mr <- lapply(br, calc_nqx, by=~SurveyId+CountryName+SurveyYear, strata=NULL) u5mr <- do.call(rbind, u5mr)
Identify the indicator IDs associated with TFR, ASFR 15-19, ~35~q~15~, and ~5~q~0~.
indic <- dhs_indicators() indic[grepl("TFR 15-49", indic$ShortName), c("IndicatorId", "ShortName", "Label")] indic[grepl("ASFR 15-19", indic$ShortName), c("IndicatorId", "ShortName", "Label")] indic[grepl("Probability of dying", indic$ShortName), c("IndicatorId", "Definition")] indic[grepl("Under-five mortality", indic$ShortName), c("IndicatorId", "Label")]
Query estimates from DHS API and merge with calculated estimates.
tfr_dhs <- dhs_data(indicatorIds = "FE_FRTR_W_TFR", surveyId = tfr$SurveyId) tfr <- merge(tfr, tfr_dhs[ , c("SurveyId", "Value")]) asfr15to19_dhs <- dhs_data(indicatorIds = "FE_FRTR_W_A15", surveyId = asfr15to19$SurveyId) asfr15to19 <- merge(asfr15to19, asfr15to19_dhs[ , c("SurveyId", "Value")]) q3515_dhs <- dhs_data(indicatorIds = c("MM_AMPB_W_AMP", "MM_AMPB_M_AMP"), surveyId = q3515$SurveyId) q3515_dhs$mm1 <- c(MM_AMPB_M_AMP = "male", MM_AMPB_W_AMP = "female")[q3515_dhs$IndicatorId] q3515 <- merge(q3515, q3515_dhs[ , c("SurveyId", "Value")]) u5mr_dhs <- dhs_data(indicatorIds = "CM_ECMT_C_U5M", surveyYearStart = 2005, breakdown = "all") u5mr_dhs <- u5mr_dhs[u5mr_dhs$SurveyId %in% u5mr$SurveyId, ] u5mr_dhs$tips <- u5mr_dhs$CharacteristicLabel u5mr <- merge(u5mr, u5mr_dhs[, c("SurveyId", "tips", "Value")])
knitr::kable(head(tfr), digits=c(rep(0, 4), 1, 3, 2), caption = "TFR") knitr::kable(head(asfr15to19), digits=c(rep(0, 5), 3, 4, 0), caption = "ASFR 15-19") knitr::kable(head(q3515), digits=c(rep(0, 5), rep(3, 4), 0), caption = "35q15") hu5mr <- head(u5mr) hu5mr$tips <- factor(hu5mr$tips, c("0-4", "5-9", "10-14")) hu5mr <- hu5mr[order(hu5mr$SurveyId, hu5mr$tips),] knitr::kable(hu5mr, digits=c(rep(0, 4), rep(3, 4), 0), caption = "5q0")
Estimates for fertility rates and adult mortality rates should exactly match those produced as standard DHS indicators.
## TFR matches exactly with(tfr, table(round(tfr, 1) == Value)) ## ASFR 15-19 matches exactly with(asfr15to19, table(round(1000*asfr) == Value)) ## 35q15 matches exactly for >80% with(q3515, table(round(1000*est) == Value)) with(q3515, table(round(1000*est) - Value)) subset(q3515, abs((round(1000*est) - Value)) > 1)
u5mr$tips <- factor(u5mr$tips, c("0-4", "5-9", "10-14")) ggplot(u5mr, aes(1000*est, Value, color=tips)) + geom_abline(slope=1, color="grey") + geom_point() + coord_fixed() + xlab("demogsurv::calc_nqx()") + ylab("DHS StatCompiler") + ggtitle("5q0 comparison")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.