#' available data in NHANES database
#'
#' @param years one or more years
#' @param cat logical, wheter to show the process
#' @return available data
#' @name nhs_files
#' @export
#'
#' @examples
#' # nhs_files_web(years=1999,items=c('d','e','l','q'))
#' # nhs_files_web(years=2019,items=c('d','e','l','q'))
#' # nhs_files_web(nhs_year(range = F),'demo')
nhs_files_web <- function(years,items,cat=TRUE){
if (do::cnOS()){
retrieve <-tmcn::toUTF8("\u63D0\u53D6\u6570\u636E(\u5E74):")
items0 <- tmcn::toUTF8("items\u8D4B\u503C\u4E0D\u5BF9,\u5E94\u8BE5\u662F\u4E0B\u5217\u503C: ")
}else{
retrieve <-'retrieve items (year):'
items0 <- 'items is not right, which should be: '
}
(years <- prepare_years(years))
(items <- prepare_items(items))
(dt <- rep(items,each=length(years)))
(ys <- rep(do::Replace0(years,'-.*'),length(items)))
cycle2018 <- 'CycleBeginYear'
cycle2019 <- 'Cycle'
cycle <- sapply(ys, function(i) if (as.numeric(do::Replace0(i,'-.*'))==2019) cycle2019 else if(as.numeric(i)<2019) cycle2018)
ys[ys=='2019'] <- '2017-2020'
urls <- sprintf('https://wwwn.cdc.gov/nchs/nhanes/search/datapage.aspx?Component=%s&%s=%s',
dt,cycle,ys)
(urls <- urls[order(ys)])
(dt <- dt[order(ys)])
(ys <- ys[order(ys)])
for (i in 1:length(urls)) {
if (i==1){
res <- list()
if (cat) cat('\n',years[i])
}else{
if (ys[i] != ys[i-1]) if (cat) cat('\n',years[i])
}
if (cat) cat(' ',dt[i])
wait <- TRUE
while (wait) {
html <- tryCatch(xml2::read_html(urls[i]),error=function(e) 'e')
wait <- ifelse(is.character(html),TRUE,FALSE)
}
tbl <- html |>
rvest::html_table()
if (length(tbl)==0){
res <- c(res,list(data.frame(cbind(year=ys[i],items=dt[i]))))
}else{
df <- tbl |>
listn(1) |>
as.data.frame() |>
df.tolower()
th <- html |>
rvest::html_elements(xpath = '//table/thead/tr//th') |> rvest::html_text()
urlp <- which(tolower(th) |> do::Replace0(' ')=='docfile')
xpath <- sprintf('//table/tbody//tr/td[%s]',urlp)
url1 <- html |>
rvest::html_elements(xpath = xpath) |>
sapply(function(i) i |> rvest::html_elements(xpath = 'a') |> rvest::html_attr('href'))
url1[sapply(url1, length) == 0] <- NA
url1 <- unlist(url1)
ck <- do::left(url1,2) == '..' & !is.na(url1)
url1[ck] <- do::Replace(url1[ck],'\\.\\.','/nchs/nhanes')
xpath <- sprintf('//table/tbody//tr/td[%s]',urlp+1)
url2 <- html |>
rvest::html_elements(xpath = xpath) |>
sapply(function(i) i |> rvest::html_elements(xpath = 'a') |> rvest::html_attr('href'))
url2[sapply(url2, length) == 0] <- NA
url2 <- unlist(url2)
ck <- do::left(url2,2) == '..' & !is.na(url2)
url2[ck] <- do::Replace(url2[ck],'\\.\\.','/nchs/nhanes')
colnames(df)[tolower(colnames(df))=='years'] <- 'year'
dfi <- cbind(year=prepare_years(ys[i]),
items=dt[i],
df[,set::not(colnames(df),'year')],
'DOC url'=paste0('https://wwwn.cdc.gov',url1),
'Data url'=paste0('https://wwwn.cdc.gov',url2))
ck <- do::file.name(url2) |> tolower() == "dxa.aspx"
ck[is.na(ck)] <- FALSE
if (any(ck)){
dx <- dxa.aspx(url = dfi$`DOC url`[ck],
years = ys[i],
items=dt[i])
dfi[ck,] <- dx
}
ck <- paste0(df$`Doc File`,dfi$`Data url`) |> do::duplicated_last()
if (any(ck)){
dfi <- dfi[!ck,]
}
row.names(dfi) <- NULL
res <- c(res,list(dfi))
}
}
x <- do.call(plyr::rbind.fill,res)
class(x) <- c('nhs_file_web','data.frame')
rownames(x) <- NULL
x
}
dxa.aspx <- function(url,years,items){
wait <- TRUE
while (wait) {
html <- tryCatch(xml2::read_html(url), error=function(e) 'e')
wait <- ifelse(is.character(html),TRUE,FALSE)
}
# xpturl
xpturl <- html |>
rvest::html_elements(xpath = '//table[@id="GridView1"]/tbody/tr') |>
set::grep_and(years) |>
rvest::html_elements(xpath = 'td[4]/a') |>
do::attr_href() |>
sprintf(fmt = 'https://wwwn.cdc.gov%s')
# docurl
docurl <- html |>
rvest::html_elements(xpath = '//table[@id="GridView1"]/tbody/tr') |>
set::grep_and(years) |>
rvest::html_elements(xpath = 'td[3]/a') |>
do::attr_href() |>
sprintf(fmt = 'https://wwwn.cdc.gov%s')
# ftablej
ftablej <- html |>
rvest::html_elements(xpath = '//table[@id="GridView1"]') |>
rvest::html_table() |>
do::select(1,drop=TRUE) |>
as.data.frame()
ftablej <- ftablej[grepl(years,ftablej$Years),]
cbind(years=ftablej[,1],items,ftablej[,-1],docurl,xpturl)
}
listn <- function(x,n=1){
x[[n]]
}
df.tolower <- function(x){
for (i in 1:ncol(x)) {
x[,i] <- tolower(x[,i])
}
x
}
#' @rdname nhs_files
#' @export
#' @param pattern for nhs_files_pc()
#' @param file_ext file extensions for nhs_files_pc(), default is NULL to list all files
#'
nhs_files_pc <- function(pattern=NULL,items,years,exclude=NULL,file_ext=NULL,cat=TRUE){
if (missing(years)) years <- nhs_year_pc()
years <- prepare_years(years)
items <- prepare_items(items)
d1 <- get_config_path() %+% '/' %+% years %+% '/'
d2 <- lapply(d1, function(i) i %+% items)
d3 <- do.call(c,d2)
if (is.null(file_ext)) file_ext <- c("sas7bdat","codebook","varLabel","tsv","update","xpt")
if (!is.null(pattern)) pattern <- paste0(pattern,collapse = '|')
f1 <- list.files(path = d3,pattern = pattern,full.names = TRUE)
ck <- tools::file_ext(f1) %in% file_ext
f1[ck]
f2 <- f1[ck]
if (!is.null(exclude)) f2 <- set::grep_not_or(f2,exclude)
f2
}
select_df <- function(x,...){
x[,c(...)]
}
file.info2 <- function(file,i){
info <- file.info(file)
yeari <- i
itemsi <- rownames(info) |>
do::Replace0(paste0('.*',i,'/')) |>
do::Replace0('/.*')
filei <- rownames(info) |>
do::Replace0('.*/')
size <- sapply(info$size,size_bt2unit)
mtime <- as.character(info$mtime)
cbind(year=yeari,items=itemsi,file=filei,size=size,mtime=mtime) |>
data.frame()
}
#' filt tsv file
#'
#' @param ... filename
#'
#' @param items items
#' @param years years
#' @param cat logical. whether to print files
#'
#' @export
#'
nhs_tsv <- function(...,items,years,cat=TRUE){
file_ext='tsv'
years <- prepare_years(years)
items <- prepare_items(items)
(d1 <- get_config_path() %+% '/' %+% years %+% '/')
(d2 <- lapply(d1, function(i) i %+% items))
(d3 <- do.call(c,d2))
# if (is.null(file_ext)) file_ext <- c("sas7bdat","codebook","varLabel","tsv","update","xpt")
f1 <- list.files(path = d3,full.names = TRUE)
ck <- tools::file_ext(f1) %in% file_ext
f2 <- f1[ck]
pattern <- c(...)
if (is.null(pattern)) return(f2)
fn <- do::file.name(f2) |> do::Replace0('\\.tsv') |> paste0('.')
ck <- lookl(x = fn,...,ignore.case = TRUE)
x <- f2[ck]
if (cat) print(x)
invisible(x)
}
size_bt2unit <- function(bt){
if (bt < 1024){
paste(round(bt,2),'B')
}else if (bt < 1024*1024){
paste(round(bt/1024,2),'KB')
}else if (bt < 1024*1024*1024){
paste(round(bt/1024/1024,2),'MB')
}else if (bt < 1024*1024*1024*1024){
paste(round(bt/1024/1024/1024,2),'GB')
}else if (bt < 1024*1024*1024*1024*1024){
paste(round(bt/1024/1024/1024/1024,2),'TB')
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.