R/mort.R

Defines functions mort_read

Documented in mort_read

# * download ------------------------------------------------------------


#' Download mortality data
#'
#' @return mortality data
#' @export
#'
mort_download <- \(){
    html <- xml2::read_html('https://ftp.cdc.gov/pub/Health_Statistics/NCHS/datalinkage/linked_mortality/')
    mortdf <- html |>
        rvest::html_elements(xpath = '//pre') |>
        as.character() |>
        strsplit('<br>|<pre>') |>
        do::list1() |>
        set::grep_and('NHANES') |>
        set::grep_not_and('NHANES_III') |>
        do::Trim() |>
        do::Replace0('</a>','href=\\"') |>
        do::Replace(" {0,}<a {0,}",'<a') |>
        do::Replace(' {0,}\\"> {0,}','\\">') |>
        do::col_split(c('<a','">'),
                      colnames = c('update','href','filename'))
    mortdf$href <- paste0('https://ftp.cdc.gov',mortdf$href)
    mortdir <- paste0(get_config_path(),'/mort')
    mortdf$filename <- paste0(mortdir,'/',tolower(mortdf$filename))
    if (!dir.exists(mortdir)) dir.create(mortdir,recursive = TRUE)
    for (i in 1:nrow(mortdf)) {
        if (i==1){
            cat(crayon::red('Download mortality data: ',nrow(mortdf)),'\n')
        }
        cat(do::file.name(mortdf$filename[i]),'\n')
        nullcon <- file(nullfile(), open = "wb")
        sink(nullcon, type = "message")
        wait <- TRUE
        while (wait) {
            download <- tryCatch(download.file(mortdf$href[i],
                                               destfile = mortdf$filename[i],
                                               quiet = FALSE,mode='wb'),
                                 error=\(e) 'e',
                                 warning=\(w) 'w')
            wait <- ifelse(download=='e' | download=='w',TRUE,FALSE)
        }
        sink(type = "message")
        close(nullcon)
        if (!wait){
            dsn <- readr::read_fwf(file=mortdf$filename[i],
                            col_types = "ciiiiiiiddii",
                            readr::fwf_cols(publicid = c(1,14),
                                     eligstat = c(15,15),
                                     mortstat = c(16,16),
                                     ucod_leading = c(17,19),
                                     diabetes = c(20,20),
                                     hyperten = c(21,21),
                                     dodqtr = c(22,22),
                                     dodyear = c(23,26),
                                     wgt_new = c(27,34),
                                     sa_wgt_new = c(35,42),
                                     permth_int = c(43,45),
                                     permth_exm = c(46,48)
                            ),
                            na = ".",
                            progress = FALSE) |> as.data.frame()
            # create the ID (SEQN) for the NHANES surveys
            dsn$seqn <- do::left(dsn$publicid,5)
            #Drop NHIS variables
            df <- dsn[,set::not(colnames(dsn),c('publicid','dodqtr','dodyear','wgt_new','sa_wgt_new'))]
            file <- do::Replace(mortdf$filename[i],'\\.dat.*','.tsv')
            write.table(df,file,sep = '\t',row.names = FALSE)
        }
    }
    cat('\n')
    mort_varLabel()
    mort_codebook()
    cat('create varLabel file\n')
    cat('create codebook file\n')
}


mort_varLabel <- \(){
    variable <- c("eligstat", "mortstat", "ucod_leading", "diabetes", "hyperten",
      "permth_int", "permth_exm")
    label <- c("Eligibility Status for Mortality Follow-up", "Final Mortality Status",
               "Underlying Cause of Death: Recode", "Diabetes Flag from Multiple Cause of Death (MCOD)",
               "Hypertension Flag from Multiple Cause of Death (MCOD)", "Number of Person Months of Follow-up from NHANES interview date",
               "Number of Person Months of Follow-up from NHANES Mobile Examination Center (MEC) date"
    )
    df <- data.frame(variable,label)
    file = paste0(get_mort_path(),'mortality.varLabel')
    write.table(df,file,sep = '\t',row.names = FALSE)
}


mort_codebook <- \(){
    variable <- c("eligstat", "eligstat", "eligstat", "mortstat", "mortstat",
                  "ucod_leading", "ucod_leading", "ucod_leading", "ucod_leading",
                  "ucod_leading", "ucod_leading", "ucod_leading", "ucod_leading",
                  "ucod_leading", "ucod_leading", "diabetes", "diabetes", "hyperten",
                  "hyperten")
    id <-c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
           4L, 4L, 5L, 5L)
    code <- c("1", "2", "3", "0", "1", "1", "2", "3", "4", "5", "6", "7",
              "8", "9", "10", "0", "1", "0", "1")
    label <- c("Eligible", "Under age 18, not available for public release",
               "Ineligible", "Assumed alive", "Assumed deceased", "Diseases of heart (I00-I09, I11, I13, I20-I51)",
               "Malignant neoplasms (C00-C97)", "Chronic lower respiratory diseases (J40-J47)",
               "Accidents (unintentional injuries) (V01-X59, Y85-Y86)", "Cerebrovascular diseases (I60-I69)",
               "Alzheimer's disease (G30)", "Diabetes mellitus (E10-E14)", "Influenza and pneumonia (J09-J18)",
               "Nephritis, nephrotic syndrome and nephrosis (N00-N07, N17-N19, N25-N27)",
               "All other causes (residual)", "No - Condition not listed as a multiple cause of death",
               "Yes - Condition listed as a multiple cause of death", "No - Condition not listed as a multiple cause of death",
               "Yes - Condition listed as a multiple cause of death")
    df <- data.frame(id,variable,code,label)
    file = paste0(get_mort_path(),'mortality.codebook')
    write.table(df,file,sep = '\t',row.names = FALSE)
}

# * read ------------------------------------------------------------
#' mortality data
#'
#' @param years one or more years, missing for all
#' @param varLabel logical, default is TRUE.
#' @param codebook logical, default is TRUE.
#'
#' @return
#' @export
#'
mort_read <- function(years,varLabel=FALSE,codebook=TRUE){
  (years <- prepare_years(years) |> do::Replace('-','_'))
  tsv <- get_mort_path() |> list.files(pattern = 'tsv',full.names = TRUE) |>
    set::grep_or(years)
  if (length(tsv)==0){
    cat('Invalid years:',paste0(years,collapse = ', '),'\n')
    return()
  }
  ck <- !sapply(years, function(i) any(grepl(i,tsv)))
  if (any(ck)){
    cat('Invalid years:',paste0(years[ck],collapse = ', '),'\n')
  }
  df <- lapply(tsv,function(i){
    data.table::fread(i,showProgress = FALSE,data.table = FALSE)
  }) |> do.call(what = plyr::rbind.fill)
  if (codebook){
    cd <- read.delim(paste0(get_mort_path(),'mortality.codebook'))
    for (i in 1:ncol(df)) {
      if (colnames(df)[i] %in% cd$variable){
        ck <- cd[,'variable'] == colnames(df)[i]
        head(cd)
        df[,i] <- recode(df[,i],paste0(cd[ck,"code"],'::',cd[ck,"label"]))
      }
    }
  }
  if (varLabel){
    vl <- read.delim(paste0(get_mort_path(),'mortality.varLabel'))
    df <- sprintf('"%s" = "%s"',vl$variable,vl$label) |>
      paste0(collapse = ', ') |>
      sprintf(fmt = 'expss::apply_labels(df,%s)') |>
      parse(file='',n=NULL) |>
      eval()
  }
  df
}



# * attach ------------------------------------------------------------

# tsv <- nhs_tsv(years = 1999:2012,'demo')
# dl <- nhs_read(tsv,varLabel = FALSE,ignore.case = TRUE)
# class(dl)
yikeshu0611/nhanesR documentation built on Jan. 29, 2022, 6:08 a.m.