# * download ------------------------------------------------------------
#' Download mortality data
#'
#' @return mortality data
#' @export
#'
mort_download <- \(){
html <- xml2::read_html('https://ftp.cdc.gov/pub/Health_Statistics/NCHS/datalinkage/linked_mortality/')
mortdf <- html |>
rvest::html_elements(xpath = '//pre') |>
as.character() |>
strsplit('<br>|<pre>') |>
do::list1() |>
set::grep_and('NHANES') |>
set::grep_not_and('NHANES_III') |>
do::Trim() |>
do::Replace0('</a>','href=\\"') |>
do::Replace(" {0,}<a {0,}",'<a') |>
do::Replace(' {0,}\\"> {0,}','\\">') |>
do::col_split(c('<a','">'),
colnames = c('update','href','filename'))
mortdf$href <- paste0('https://ftp.cdc.gov',mortdf$href)
mortdir <- paste0(get_config_path(),'/mort')
mortdf$filename <- paste0(mortdir,'/',tolower(mortdf$filename))
if (!dir.exists(mortdir)) dir.create(mortdir,recursive = TRUE)
for (i in 1:nrow(mortdf)) {
if (i==1){
cat(crayon::red('Download mortality data: ',nrow(mortdf)),'\n')
}
cat(do::file.name(mortdf$filename[i]),'\n')
nullcon <- file(nullfile(), open = "wb")
sink(nullcon, type = "message")
wait <- TRUE
while (wait) {
download <- tryCatch(download.file(mortdf$href[i],
destfile = mortdf$filename[i],
quiet = FALSE,mode='wb'),
error=\(e) 'e',
warning=\(w) 'w')
wait <- ifelse(download=='e' | download=='w',TRUE,FALSE)
}
sink(type = "message")
close(nullcon)
if (!wait){
dsn <- readr::read_fwf(file=mortdf$filename[i],
col_types = "ciiiiiiiddii",
readr::fwf_cols(publicid = c(1,14),
eligstat = c(15,15),
mortstat = c(16,16),
ucod_leading = c(17,19),
diabetes = c(20,20),
hyperten = c(21,21),
dodqtr = c(22,22),
dodyear = c(23,26),
wgt_new = c(27,34),
sa_wgt_new = c(35,42),
permth_int = c(43,45),
permth_exm = c(46,48)
),
na = ".",
progress = FALSE) |> as.data.frame()
# create the ID (SEQN) for the NHANES surveys
dsn$seqn <- do::left(dsn$publicid,5)
#Drop NHIS variables
df <- dsn[,set::not(colnames(dsn),c('publicid','dodqtr','dodyear','wgt_new','sa_wgt_new'))]
file <- do::Replace(mortdf$filename[i],'\\.dat.*','.tsv')
write.table(df,file,sep = '\t',row.names = FALSE)
}
}
cat('\n')
mort_varLabel()
mort_codebook()
cat('create varLabel file\n')
cat('create codebook file\n')
}
mort_varLabel <- \(){
variable <- c("eligstat", "mortstat", "ucod_leading", "diabetes", "hyperten",
"permth_int", "permth_exm")
label <- c("Eligibility Status for Mortality Follow-up", "Final Mortality Status",
"Underlying Cause of Death: Recode", "Diabetes Flag from Multiple Cause of Death (MCOD)",
"Hypertension Flag from Multiple Cause of Death (MCOD)", "Number of Person Months of Follow-up from NHANES interview date",
"Number of Person Months of Follow-up from NHANES Mobile Examination Center (MEC) date"
)
df <- data.frame(variable,label)
file = paste0(get_mort_path(),'mortality.varLabel')
write.table(df,file,sep = '\t',row.names = FALSE)
}
mort_codebook <- \(){
variable <- c("eligstat", "eligstat", "eligstat", "mortstat", "mortstat",
"ucod_leading", "ucod_leading", "ucod_leading", "ucod_leading",
"ucod_leading", "ucod_leading", "ucod_leading", "ucod_leading",
"ucod_leading", "ucod_leading", "diabetes", "diabetes", "hyperten",
"hyperten")
id <-c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 5L, 5L)
code <- c("1", "2", "3", "0", "1", "1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "0", "1", "0", "1")
label <- c("Eligible", "Under age 18, not available for public release",
"Ineligible", "Assumed alive", "Assumed deceased", "Diseases of heart (I00-I09, I11, I13, I20-I51)",
"Malignant neoplasms (C00-C97)", "Chronic lower respiratory diseases (J40-J47)",
"Accidents (unintentional injuries) (V01-X59, Y85-Y86)", "Cerebrovascular diseases (I60-I69)",
"Alzheimer's disease (G30)", "Diabetes mellitus (E10-E14)", "Influenza and pneumonia (J09-J18)",
"Nephritis, nephrotic syndrome and nephrosis (N00-N07, N17-N19, N25-N27)",
"All other causes (residual)", "No - Condition not listed as a multiple cause of death",
"Yes - Condition listed as a multiple cause of death", "No - Condition not listed as a multiple cause of death",
"Yes - Condition listed as a multiple cause of death")
df <- data.frame(id,variable,code,label)
file = paste0(get_mort_path(),'mortality.codebook')
write.table(df,file,sep = '\t',row.names = FALSE)
}
# * read ------------------------------------------------------------
#' mortality data
#'
#' @param years one or more years, missing for all
#' @param varLabel logical, default is TRUE.
#' @param codebook logical, default is TRUE.
#'
#' @return
#' @export
#'
mort_read <- function(years,varLabel=FALSE,codebook=TRUE){
(years <- prepare_years(years) |> do::Replace('-','_'))
tsv <- get_mort_path() |> list.files(pattern = 'tsv',full.names = TRUE) |>
set::grep_or(years)
if (length(tsv)==0){
cat('Invalid years:',paste0(years,collapse = ', '),'\n')
return()
}
ck <- !sapply(years, function(i) any(grepl(i,tsv)))
if (any(ck)){
cat('Invalid years:',paste0(years[ck],collapse = ', '),'\n')
}
df <- lapply(tsv,function(i){
data.table::fread(i,showProgress = FALSE,data.table = FALSE)
}) |> do.call(what = plyr::rbind.fill)
if (codebook){
cd <- read.delim(paste0(get_mort_path(),'mortality.codebook'))
for (i in 1:ncol(df)) {
if (colnames(df)[i] %in% cd$variable){
ck <- cd[,'variable'] == colnames(df)[i]
head(cd)
df[,i] <- recode(df[,i],paste0(cd[ck,"code"],'::',cd[ck,"label"]))
}
}
}
if (varLabel){
vl <- read.delim(paste0(get_mort_path(),'mortality.varLabel'))
df <- sprintf('"%s" = "%s"',vl$variable,vl$label) |>
paste0(collapse = ', ') |>
sprintf(fmt = 'expss::apply_labels(df,%s)') |>
parse(file='',n=NULL) |>
eval()
}
df
}
# * attach ------------------------------------------------------------
# tsv <- nhs_tsv(years = 1999:2012,'demo')
# dl <- nhs_read(tsv,varLabel = FALSE,ignore.case = TRUE)
# class(dl)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.