R/dhs.R
In childfree: Access and Harmonize Childfree Demographic Data

Documented in dhs

#' Read and recode Demographic and Health Surveys (DHS) individual data
#'
#' @param files vector: a character vector containing the paths for one or more Individual Recode DHS data files (see details)
#' @param extra.vars vector: a character vector containing the names of variables to be retained from the raw data
#' @param progress boolean: display a progress bar
#'
#' @details
#' The \href{https://www.dhsprogram.com/}{Demographic and Health Surveys} (DHS) program regularly collects
#'    health data from population-representative samples in many countries using standardized surveys since 1984. The
#'    "individual recode" data files contain women's responses, while the "men recode" files contain men's responses. These
#'    files are available in SPSS, SAS, and Stata formats from \href{https://www.dhsprogram.com/}{https://www.dhsprogram.com/},
#'    however access requires a \href{https://dhsprogram.com/data/Access-Instructions.cfm}{free application}. The `dhs()` function
#'    reads one or more of these files, extracts and recodes selected variables useful for studying childfree adults and other
#'    family statuses, then returns an unweighted data frame.
#'
#' Although access to DHS data requires an application, the DHS program provides a \href{https://dhsprogram.com/data/Download-Model-Datasets.cfm}{model dataset}
#'    for practice. The example provided below uses the model data file "ZZIR62FL.SAV", which contains fictitious women's data,
#'    but has the same structure as a real DHS data file. The example can be run without prior application for data access.
#'
#' **Sampling weights**
#'
#' The DHS is collected using a complex survey design. The \code{survey} package can be used to perform analyses that take these
#'    design features into account, and make it possible to obtain population-representative estimates. In most cases, a \link[survey]{svydesign}
#'    object for a single country and wave can be created using \code{survey::svydesign(data = data, ids = ~cluster, strata = ~strata, weights = ~weight, nest = TRUE)}.
#'    Additional information about analyzing DHS data using weights is available \href{https://dhsprogram.com/data/Guide-to-DHS-Statistics/Analyzing_DHS_Data.htm}{here}
#'    and in the documentation provided with the downloaded data files.
#'
#' **Non-biological children**
#'
#' Information about non-biological children (e.g., adopted children, foster children, etc.) is not available in the DHS, which means
#'    that a respondent with only non-biological children would be classified as a non-parent. This is not exactly match the approach
#'    described by the ABC Framework (Neal & Neal, 2024), and may lead to discrepancies when comparing DHS estimates to estimates derived
#'    from other data where information about non-biological children is available.
#'
#' **Additional notes**
#'   * The SPSS-formatted files containing data from Gabon Recode 4 (GAIR41FL.SAV, GAMR41FL.SAV) and Turkey Recode 4 (TRIR41FL.SAV, TRMR41FL.SAV)
#'     contain encoding errors. Use the SAS-formatted files (GAIR41FL.SAS7BDAT, GAMR41FL.SAS7BDAT, TRIR41FL.SAS7BDAT, TRMR41FL.SAS7BDAT) instead.
#'   * In some cases, DHS makes available individual recode data files for specific regions. For example, women's data from individual states
#'     in India from 1999 are contained in files named XXIR42FL.SAV, where the "XX" is a two-letter state code. The `dhs()` function has only
#'     been tested using whole-country files, and may not perform as expected for regional files.
#'   * Variables containing women's responses in the individual recode files begin with `v`, while variables containing men's responses in the
#'     men recode files begin with `mv`. When applying `dhs()` to both female and male data, these are automatically harmonized. However, if
#'     extra variables are requested using the `extra.vars` option, be sure to specify both names (e.g. `extra.vars = c("v201", "mv201")`).
#'
#' @return A data frame containing variables described in the codebook available using \code{vignette("codebooks")}
#' If you are offline, or if the requested data are otherwise unavailable, NULL is returned.
#'
#' @references ABC Framework: {Neal, Z. P. and Neal, J. W. (2024). A framework for studying adults who neither have nor want children. *The Family Journal, 32*, 121-130. \doi{10.1177/10664807231198869}}
#' @export
#'
#' @examples
#' \donttest{
#' dat <- dhs(files = c("ZZIR62FL.SAV"), extra.vars = c("v201"))  #Request data for fictitous country
#' if (!is.null(dat)) {  #If data was available...
#' table(dat$famstat)/nrow(dat)  #Fraction of respondents with each family status
#' }
#' }
dhs <- function(files, extra.vars = NULL, progress = TRUE) {

  if (length(files) > 1 & "ZZIR62FL.SAV" %in% files) {stop("Model data (file ZZIR62FL.SAV) should not be combined with files containing real data.")}

  if (!is.null(extra.vars)) {extra.vars <- tolower(extra.vars)}  #Make requested extra variables lowercase

  if (progress) {message("Processing DHS data files -")}
  if (progress) {pb <- utils::txtProgressBar(min = 0, max = length(files), initial = 0, style = 3)} #Initialize progress bar

  #Loop over each supplied data file
  for (file in 1:length(files)) {

    #Import raw data
    if (files[file]=="ZZIR62FL.SAV") {  #Model file from https://dhsprogram.com/data/Download-Model-Datasets.cfm
      if (!RCurl::url.exists("https://osf.io/download/hk23e")) {message("You are offline or sample DHS data is not available now. Try again later"); data <- NULL; return(data)}
      temp <- tempfile()
      utils::download.file(url = "https://osf.io/download/hk23e", destfile = temp)
      dat <- rio::import(temp, format = "sav")
    } else {
      if (progress) {utils::setTxtProgressBar(pb,file)}
      dat <- rio::import(files[file])
      }
    colnames(dat) <- tolower(colnames(dat))  #Make all variables lowercase

    #Check type of file
    female <- NULL
    if (colnames(dat)[1]=="caseid" | colnames(dat)[1]=="case$id") {female <- TRUE}
    if (colnames(dat)[1]=="mcaseid" | colnames(dat)[1]=="mcase$id") {female <- FALSE}
    if (is.null(female)) {stop(paste0(files[file], " does not appear to be an individual or men DHS recode file."))}

    #### Family Status ####
    #Number of children
    if (female) {dat$numkid <- dat$v201} else {dat$numkid <- dat$mv201}

    #Want children
    if (female) {dat$want <- dat$v602} else {dat$want <- dat$mv602}
    dat$want[which(dat$want==0)] <- NA  #Zero doesn't seem to be a valid value
    dat$want[which(dat$want==4)] <- 5  #Combine 4-Sterilized with 5-Infecund
    dat$want[which(dat$want==7)] <- 5  #Combine 7-Man Infecund with 5-Infecund
    dat$want[which(dat$want>=6)] <- NA  #Various labels, none about wants
    dat$want <- factor(dat$want, levels = c(1,2,3,5), labels = c("Have (another)", "Undecided", "No (more)", "Infecund"))

    #Ideal number of children
    if (female) {dat$ideal <- dat$v613} else {dat$ideal <- dat$mv613}
    dat$ideal[which(dat$ideal==98)] <- -1  #Special code for "Don't Know"
    dat$ideal[which(dat$ideal>=90)] <- NA  #Treat all 90s values as missing (undocumented country-specific special codes)

    #Childfree (want)
    dat$cf_want <- NA
    dat$cf_want[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$want) & dat$want=="No (more)")] <- 1  #Childfree if (a) have no children and (b) want no children
    dat$cf_want[which(!is.na(dat$numkid) & dat$numkid>0)] <- 0  #Not childfree if have children
    dat$cf_want[which(!is.na(dat$want) & (dat$want=="Have (another)" | dat$want=="Undecided"))] <- 0  #Not childfree if want or may want children

    #Childfree (ideal)
    dat$cf_ideal <- NA
    dat$cf_ideal[which(!is.na(dat$numkid) & dat$numkid==0 &
                       !is.na(dat$ideal) & dat$ideal==0)] <- 1  #Childfree if (a) have no children and (b) zero children is ideal
    dat$cf_ideal[which(!is.na(dat$numkid) & dat$numkid>0)] <- 0  #Not childfree if have children
    dat$cf_ideal[which(!is.na(dat$ideal) & (dat$ideal==-1 | dat$ideal>0))] <- 0  #Not childfree if it is ideal to have some number of children, or the ideal number of children is unknown

    #Family status
    dat$famstat <- NA
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid>0)] <- 1  #Parent - Unclassified (known number of children greater than zero)
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid>0 &
                      !is.na(dat$ideal) & dat$numkid==dat$ideal)] <- 2  #Parent - Fulfilled (has ideal number of children)
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid>0 &
                      !is.na(dat$ideal) & dat$numkid<dat$ideal)] <- 3  #Parent - Unfulfilled (has less than ideal number of children)
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid>0 &
                      !is.na(dat$ideal) & dat$numkid>dat$ideal & dat$ideal!=-1)] <- 4  #Parent - Reluctant (has more than ideal number of children)
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid>0 &
                      !is.na(dat$ideal) & dat$ideal==-1)] <- 5  #Parent - Ambivalent (unsure how many children is ideal)

    #This may include a small number of "childless - unclassified/social" (respondent wants children, but does not intend to have them for non-medical/social reasons)
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$want) & dat$want=="Have (another)")] <- 6  #Not yet parent (wants child(ren), regardless of how many is ideal)

    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$want) & dat$want=="Infecund" &
                      !is.na(dat$ideal) & dat$ideal>0)] <- 9  #Childless - Biological (cannot have children, but a specific number would have been ideal)

    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$want) & dat$want=="Infecund" &
                      !is.na(dat$ideal) & dat$ideal==-1)] <- 10  #Ambivalent non-parent (cannot have children, ideal number is unknown)

    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                        !is.na(dat$want) & dat$want=="Undecided")] <- 11  #Undecided (unsure if want children)

    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$want) & dat$want=="No (more)")] <- 12  #Childfree (do not want children)
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$ideal) & dat$ideal==0)] <- 12  #Childfree (zero children is ideal)

    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$ideal) & dat$ideal==0 &
                      !is.na(dat$want) & dat$want!="No (more)")] <- 11  #Undecided, ideal and want responses are inconsistent
    dat$famstat[which(!is.na(dat$numkid) & dat$numkid==0 &
                      !is.na(dat$ideal) & dat$ideal>0 &
                      !is.na(dat$want) & dat$want=="No (more)")] <- 11  #Undecided, ideal and want responses are inconsistent

    dat$famstat <- factor(dat$famstat, levels = c(1:12),
                          labels = c("Parent - Unclassified", "Parent - Fulfilled", "Parent - Unfulfilled", "Parent - Reluctant", "Parent - Ambivalent",
                                     "Not yet parent", "Childless - Unclassified", "Childless - Social", "Childless - Biological", "Ambivalent non-parent", "Undecided", "Childfree"))

    #### Demographics ####
    #Sex
    if (female) {dat$sex <- 1} else {dat$sex <- 2}
    dat$sex <- factor(dat$sex, levels = c(1,2,3), labels = c("Female", "Male", "Other"))

    #Age in years
    if (female) {dat$age <- dat$v012} else {dat$age <- dat$mv012}

    #Education in years
    if (female) {dat$education <- dat$v133} else {dat$education <- dat$mv133}
    dat$education[dat$education>=40] <- NA

    #Partnership status
    if (female) {dat$partnered <- dat$v502 + 1} else {dat$partnered <- dat$mv502 + 1}
    dat$partnered <- factor(dat$partnered, levels = c(1,2,3), labels = c("Never", "Currently", "Formerly"))

    #Residence
    if (female) {
      dat$residence <- dat$v102
      if (dat$v000[1]=="MX" & dat$v007[1]==87) {  #Mexico Wave 1 used a different coding
        dat$residence[which(dat$residence<4)] <- 2  #Code as rural (1) Less than 2500, (2) 2500-19999, and (3) 20000+
        dat$residence[which(dat$residence==4)] <- 1  #Code as urban (4) Areas Metropolitanas
      }
    }
    if (!female) {
      dat$residence <- dat$mv102
      if (dat$mv000[1]=="MX" & dat$mv007[1]==87) {  #Mexico Wave 1 used a different coding
        dat$residence[which(dat$residence<4)] <- 2  #Code as rural (1) Less than 2500, (2) 2500-19999, and (3) 20000+
        dat$residence[which(dat$residence==4)] <- 1  #Code as urban (4) Areas Metropolitanas
      }
    }
    dat$residence <- factor(dat$residence, levels = c(2,98,99,1), labels = c("Rural", "Town", "Suburb", "Urban"), ordered = TRUE)  #98 and 99 are dummy values; "Town" and "Suburb" categories are not used by DHS

    #Employed
    if (female) {dat$employed <- dat$v714} else {dat$employed <- dat$mv714}

    #### Attitude ####
    #Religion
    dat$religion <- NA

    if (female) {x <- as.data.frame(attr(dat$v130, "labels"))} else {x <- as.data.frame(attr(dat$mv130, "labels"))}  #Get file-specific dictionary (o = old value, l = label, n = new value)
    if (nrow(x) > 0) {  #If there are labeled values for religions...
      x$label <- rownames(x)
      colnames(x) <- c("o", "l")
      x$n <- NA

      for (i in 1:nrow(x)) {  #For each old label, identify new value
        if (x$l[i] %in% c("Agnostic", "Atheist", "DK", "Don t know", "Don't know", "No religion", "No Religion", "No religion (Sem religiao)",
                          "No religion/atheists", "No religion/none", "None", "NONE", "Not religion", "Not Religious", "Not religious",
                          "Sans", "Sem religio")) {x$n[i] <- 1}  #None

        if (x$l[i] %in% c("Catholic", "Catholic (Cat\U00A2lica)", "Catholic/greek cath.", "Catholicism", "Catholique", "Catolica romana",
                          "Christian Catholic", "Christian Orthodox", "Orthodox", "Roman Catholic", "Roman catholic",
                          "Roman Catholic church")) {x$n[i] <- 2}  #Catholic/Orthodox

        if (x$l[i] %in% c("Bektashi", "Islam", "Islamic", "Islamic (Mu\U2021ulman)", "Moslem", "Mulsim", "Muslem", "Muslim",
                          "muslim", "Muslim/Islam", "Muslin", "Muslman", "Muslum", "Musulman", "Musulmane")) {x$n[i] <- 3}  #Muslim

        if (x$l[i] %in% c("Jew or Isreaeli", "Jewish", "Judaica ou israelita", "Judaism", "Zion", "Zionist")) {x$n[i] <- 4}  #Jewish

        if (x$l[i] %in% c("\"Celestes\"", "7th Day adventist", "Adventist", "Adventist/Jehova", "Adventiste", "Adventiste/Jehova",
                          "African instituted churches", "Aglipay", "Anglican", "Anglican Church", "Apostolic sect", "Apostolic Sect",
                          "Arm,e du Salut", "Assembly of god", "Assembly of God", "Aventist", "Baptist", "Born Again Christian (other recode)",
                          "Born-again/Jehovah's Witness/SDA", "Budu", "CCAP", "Celestes", "Celestes (Celestial Church of Christ)", "Charismatic",
                          "Chistiane", "Christan", "Christian", "christian", "Christian Protestant", "Christian/protestant", "Christianity",
                          "Christrian", "Eglise de r\U00E9veil", "Eglise du 7e jour", "Elcin", "Evangelic", "Evangelica (Crente)", "Evangelical",
                          "Evangelical / Protestant", "Evangelical Alliance", "Evangelical churches", "Evangelical Lutheran", "Evangelical presbyterian",
                          "Evangelical/pentecostal", "Evangelist", "FJKM/FLM/Anglikana", "Iglesia ni Cristo", "Iglesia ni kristo", "Iglesia Ni Kristo",
                          "Jahovai", "JEHOVAH witness", "Jehovah witness", "Jehovah Witness", "Jehovah's Witness", "Jehovah's Witness (other recode)",
                          "Jehovah's witnesses", "Jeova witness", "Kibanguist", "Kimbanguist", "Kimbanguiste", "Lesotho Evangelical church",
                          "Method., Advent., ..", "Methodist", "Methodist/Baptist", "New apostolic", "Other Christian", "Other christian",
                          "other Christian", "Other Christian (not otherwise categorisable)", "Other Christian Church", "Other Christian religion",
                          "Other christians", "Other Christians", "Other chritians", "Other protestant", "Other Protestant", "Other Protestants",
                          "Pentecostal", "Pentecostal/Born Again/Evangelical", "Pentecostal/Charismatic", "Pentecostal/charismatic", "Pentecotist",
                          "Presbyterian", "Prostestant", "Protest /Oth Cristian", "Protestant", "Protestant (ex. evangelical, baptist, jehovah witness)",
                          "Protestant (Protestante)", "Protestant / Evangelic", "Protestant /Christian", "Protestant methodist", "Protestant Methodist",
                          "Protestant presbyterian, methodist", "Protestant, methodist, adventist, witness of Jesus", "Protestant/ methodist/adventist/Jehova witness",
                          "Protestant/ other Christian", "Protestant/ Other Christian", "Protestant/Anglican", "Protestant/FLM", "Protestant/other Christian",
                          "Protestant/other christian", "Protestanta", "Protestante", "Protestantism", "Protestants", "Rastafarian", "Salvation Army",
                          "Salvation army", "SDA", "Seventh Day Advent", "Seventh day advent.", "Seventh Day Advent./Baptist", "Seventh Day Advent/ Baptist",
                          "Seventh Day Adventist", "Seventh Day Adventist (other recode)", "Seventh Day Adventist / Baptist", "Seventh Day Adventist/Baptist",
                          "Seventh-day adventist", "Trad. prosestant", "Tradit. protestant", "United Church", "Universal")) {x$n[i] <- 5}  #Protestant

        if (x$l[i] %in% c("Aucune", "Autre", "Autres", "Baha'i", "Bahai", "Confucian", "Espirita Kardecista", "Espiritista kardecis",
                          "Jain", "Mammon", "Mana", "New Religions (Eglises Rebeillees)", "Non-Christian", "Only god", "Oriental religions",
                          "Other", "other", "Other (Outra)", "Other non-Christian", "Other religion", "Other religions", "Others", "Outras",
                          "Parsi / Zoroastrian", "Parsi/Zoroastrian", "Religioes orientais", "Revival church", "Sect", "Sikh", "Spiritual",
                          "Spiritual kardecista", "Spiritualist", "Zephirin/Matsouaniste/Ngunza", "Zephirrin/Matsouanist/Ngunza",
                          "Zoroastian/Parsi", "Animalist", "Animist", "Animiste", "Cao Dai", "Doni-Polo", "Donyi polo", "Espirita Afro-Bras.",
                          "Espiritista afro-bra", "Indigenous spirituality", "Kirat", "Mayan", "Nature worship", "Other traditional",
                          "Religion traditionelle", "Sanamahi", "Taditional", "Tradition/animist", "Traditional", "Traditional (Vodoun)",
                          "Traditional / animist", "Traditional Mayan", "Traditional religion", "Traditional Religion", "Traditional/animist",
                          "Traditional/Animist", "Traditional/spiritualist", "Traditionalist", "Traditionelle", "Traditionists", "Traditionnal",
                          "Traditionnal/animist", "Umbanda /Candomble", "Vaudou", "Vaudousant", "Vodoun")) {x$n[i] <- 6}  #Other

        if(x$l[i] %in% c("Buddhism", "Buddhist", "Buddhist / Neo-Buddhist", "Buddhist/Neo Buddhist", "Buddhist/Neo-Buddhist",
                         "Budhist", "Hoa Hao")) {x$n[i] <- 7}  #Buddhist

        if (x$l[i] %in% c("Hindu", "Hinduism")) {x$n[i] <- 8}  #Hindu

        if (female) {dat$religion[which(dat$v130==x$o[i])] <- x$n[i]} else {dat$religion[which(dat$mv130==x$o[i])] <- x$n[i]}  #Insert new value into recoded religion variable
      }
    }

    dat$religion <- factor(dat$religion, levels = c(1:8), labels = c("None", "Catholic / Orthodox", "Muslim", "Jewish",
                                                                     "Protestant / Christian", "Other", "Buddhist", "Hindu"))

    #### Design ####
    #Identifier (non-standard variable name in Egypt 1988-89)
    if (female) {if (dat$v000[1]=="EG" & (dat$v007[1]==88 | dat$v007[1]==89)) {dat$id <- dat$`case$id`} else {dat$id <- dat$caseid}}
    if (!female) {if (dat$mv000[1]=="EG" & (dat$mv007[1]==88 | dat$mv007[1]==89)) {dat$id <- dat$`mcase$id`} else {dat$id <- dat$mcaseid}}
    dat$id <- as.character(dat$id)

    #Country
    country.codes <- c("AF", "AL", "AO", "AM", "AZ", "BD", "BJ", "BO", "BT", "BR", "BF", "BU", "KH", "CM", "CV", "CF", "TD", "CO", "KM", "CG",
                       "CD", "CI", "DR", "EC", "EG", "ES", "EK", "ER", "ET", "GA", "GM", "GH", "GU", "GN", "GY", "HT", "HN", "IA", "ID", "JO",
                       "KK", "KE", "KY", "LA", "LS", "LB", "MD", "MW", "MV", "ML", "MR", "MX", "MB", "MA", "MZ", "MM", "NM", "NP", "NC", "NI",
                       "NG", "OS", "PK", "PY", "PE", "PH", "RW", "WS", "ST", "SN", "SL", "ZA", "LK", "SD", "SZ", "TJ", "TZ", "TH", "TL", "TG",
                       "TT", "TN", "TR", "TM", "UG", "UA", "UZ", "VN", "YE", "ZM", "ZW", "PG", "ZZ")
    country.names <- c("Afghanistan", "Albania", "Angola", "Armenia", "Azerbaijan", "Bangladesh", "Benin", "Bolivia", "Botswana", "Brazil", "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Cape Verde", "Central African Republic", "Chad", "Colombia", "Comoros", "Congo",
                       "Congo Democratic Republic", "Cote d'Ivoire", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Ethiopia", "Gabon", "Gambia", "Ghana", "Guatamala", "Guinea", "Guyana", "Haiti", "Honduras", "India", "Indonesia", "Jordan",
                       "Kazakhstan", "Kenya", "Kyrgyz Republic", "Lao People's Democratic Republic", "Lesotho", "Liberia", "Madagascar", "Malawi", "Maldives", "Mali", "Mauritania", "Mexico", "Moldova", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nepal", "Nicaragua", "Niger",
                       "Nigeria", "Nigeria (Ondo State)", "Pakistan", "Paraguay", "Peru", "Philippines", "Rwanda", "Samoa", "Sao Tome and Principe", "Senegal", "Sierra Leone", "South Africa", "Sri Lanka", "Sudan", "Swaziland", "Tajikstan", "Tanzania", "Thailand", "Timor-Leste", "Togo",
                       "Trinidad and Tobago", "Tunisia", "Turkey", "Turkministan", "Uganda", "Ukraine", "Uzbekistan", "Vietnam", "Yemen", "Zambia", "Zimbabwe", "Papua New Guinea", "Fake Country")
    if (female) {dat$country <- country.names[match(substr(dat$v000,1,2), country.codes)]} else {dat$country <- country.names[match(substr(dat$mv000,1,2), country.codes)]}

    #Weighting variables
    if (female) {dat$weight <- dat$v005/1000000} else {dat$weight <- dat$mv005/1000000}
    if (female) {dat$cluster <- dat$v021} else {dat$cluster <- dat$mv021}
    if (female) {dat$strata <- dat$v023} else {dat$strata <- dat$mv023}

    #Wave (called "Recode" in the DHS)
    if (female) {
      dat$wave <- as.numeric(substr(dat$v000,3,3))
      dat$wave[which(dat$country=="Vietnam" & dat$v007==97)] <- 3  #Recode was labeled as "T" for Vietnam 1997
      dat$wave[which(dat$country=="Vietnam" & dat$v007==2)] <- 4  #Recode was labeled as "T" for Vietnam 2002
    }
    if (!female) {
      dat$wave <- as.numeric(substr(dat$mv000,3,3))
      dat$wave[which(dat$country=="Vietnam" & dat$mv007==97)] <- 3  #Recode was labeled as "T" for Vietnam 1997
      dat$wave[which(dat$country=="Vietnam" & dat$mv007==2)] <- 4  #Recode was labeled as "T" for Vietnam 2002
    }
    dat$wave[which(is.na(dat$wave))] <- 1  #In recode 1, v000 only contained the country code

    #Year and month of data collection
    if (female) {dat$cmc <- dat$v008} else {dat$cmc <- dat$mv008}  #Century month code
    if (dat$country[1]=="Ethiopia") {dat$cmc <- dat$cmc + 92}  #Correction for Ethiopian calendar (https://dhsprogram.com/data/Guide-to-DHS-Statistics/Organization_of_DHS_Data.htm)
    if (dat$country[1]=="Nepal" & dat$wave[1]!=3) {dat$cmc <- dat$cmc - 681}  #Correction for Nepali calendar (https://dhsprogram.com/data/Guide-to-DHS-Statistics/Organization_of_DHS_Data.htm)
    if (dat$country[1]=="Nepal" & dat$wave[1]==3) {dat$cmc <- dat$cmc + 519}
    if (dat$country[1]=="Afghanistan") {dat$cmc <- dat$cmc + 255}  #Correction for Afghani calendar (https://dhsprogram.com/data/Guide-to-DHS-Statistics/Organization_of_DHS_Data.htm)
    dat$year <- 1900+floor((dat$cmc-1)/12)
    dat$month <- dat$cmc - (12 * (dat$year - 1900))
    dat$month <- factor(dat$month, levels = c(1:12), labels = c("January", "February", "March", "April", "May", "June",
                                                                "July", "August", "September", "October", "November", "December"),
                        ordered = TRUE)

    #Source file
    dat$file <- files[file]

    #Source survey
    dat$survey <- "DHS"

    #### Clean up ####
    #Check for extra variables; if not present, add them
    for (var in extra.vars) {if (!(var %in% colnames(dat))) {  #For each extra variable that is not present:
      dat$v8675309 <- NA   #(1) Add a temporary missing variable called `v8675309`
      colnames(dat)[length(colnames(dat))] <- var  #(2) Rename this new missing variable with the correct name
      }
    }

    #Reduce data
    if (!is.null(extra.vars)) {
      dat <- dat[,c("cf_want", "cf_ideal", "famstat",  #Family status
                    "sex", "age", "education", "partnered", "residence", "employed",  #Demographics
                    "religion",  #Attitude
                    "id", "country", "weight", "cluster", "strata", "file", "survey", "wave", "year", "month",  #Design
                    extra.vars)]
    } else {
      dat <- dat[,c("cf_want", "cf_ideal", "famstat",  #Family status
                    "sex", "age", "education", "partnered", "residence", "employed",  #Demographics
                    "religion",  #Attitude
                    "id", "country", "weight", "cluster", "strata", "file", "survey", "wave", "year", "month")]  #Design
    }

    #Start data file, or append to existing data file
    if (file==1) {data <- dat} else {data <- rbind(data, dat)}

  }

  #Finalize
  if (progress) {close(pb)}  #Close progress bar
  class(data) <- c("data.frame", "childfree")
  return(data)

}