R/nhs_codebook.R

Defines functions nhs_codebook.dataframe nhs_codebook.character nhs_codebook

Documented in nhs_codebook

#' Codebook for variable
#'
#' @param ... one(suggest) or more variable names
#' @param tolower logical
#' @return variable codebook
#' @export
#
nhs_codebook <- function(...,tolower=FALSE){
    x <- list(...)
    if (length(x)==0) return(nhs_codebook.character(character())) # return all
    (ckdf <- sapply(x,class) == 'data.frame')
    (cklist <- sapply(x,class) == 'list')
    (ckpath <- sapply(x, function(i) any(grepl(get_config_path(),i,ignore.case = TRUE))))
    if (any(ckdf)){
        files <- x[ckdf]
        other <- x[!(ckdf | cklist| ckpath)]
        variable <- lapply(other, function(i) i[!grepl(get_config_path(),i,ignore.case = TRUE)]) |>
            unlist() |> tolower() |> unique()
        lapply(files,function(i) nhs_codebook.dataframe(variable, i,tolower)) |>
            do.call(what = rbind)
    }else if(any(ckpath)){
        (files <- x[ckpath] |> unlist() |> unique())
        (other <- x[!(ckdf | cklist| ckpath)])
        variable <- lapply(other, function(i) i[!grepl(get_config_path(),i,ignore.case = TRUE)]) |>
            unlist() |> tolower() |> unique()
        nhs_codebook.character(variable, files,tolower)
    }else if (!any(ckpath)){
        files <- nhs_files_pc(file_ext = 'codebook')
        other <- x[!(ckdf | cklist| ckpath)]
        variable <- lapply(other, function(i) i[!grepl(get_config_path(),i,ignore.case = TRUE)]) |>
            unlist() |> tolower() |> unique()
        nhs_codebook.character(variable, files,tolower)
    }else if (!any(cklist)){
        files <- x[cklist]
        other <- x[!(ckdf | cklist| ckpath)]
        variable <- lapply(other, function(i) i[!grepl(get_config_path(),i,ignore.case = TRUE)]) |>
            unlist() |> tolower() |> unique()
        lapply(files,function(i) lapply(files,function(j) nhs_codebook.dataframe(variable, j,tolower) |>
                                            do.call(what = rbind)) |>
                   do.call(what = rbind))
    }
}

nhs_codebook.character <- function(variable, files,tolower=FALSE){
    if (missing(files)) files <- nhs_tsv(cat = FALSE)
    ck <- tools::file_ext(files) != 'codebook'
    if (any(ck)){
        ext <- tools::file_ext(files[ck]) |> unique() |> sprintf(fmt = '\\.%s')
        files[ck] <- do::Replace0(files[ck],ext) |> sprintf(fmt = '%s.codebook')
    }

    if (length(files)>200) pb <- txtProgressBar(max=length(files),width = 25,style = 3)
    r <- lapply(files, function(i){
        if (length(files)>200) setTxtProgressBar(pb,which(files==i))
        i <- do::formal_dir(i) |> do::Replace('//','/')
        (Year <- prepare_years(i))
        (Item <- prepare_items(i))
        (file <- do::file.name(i) |> do::Replace0('\\.codebook'))
        (codei <- read.delim(i,comment.char = '#'))
        if (nrow(codei)==0) return()
        (codei <- codei[,c('variable','code','label')])
        if (tolower) codei$label <- tolower(codei$label)
        cbind(Year=Year,Item=Item,file=file,codei)
    }) |> do.call(what = plyr::rbind.fill)
    if (length(files)>200) cat('\n')
    if (length(variable)==0) return(r)
    ck <- lookl(r$variable,variable,ignore.case = TRUE)
    r <- r[ck,]
    dc <- reshape2::dcast(r,Year+Item+file+variable~label,toString,value.var = 'code')
    dc5 <- dc[,5:ncol(dc),drop=FALSE]
    od <- sapply(dc5,as.numeric) |> colMeans(na.rm = TRUE) |> order()
    dc[,5:ncol(dc)] <- dc5[,od]
    dc
}



nhs_codebook.dataframe <- function(variable, files,tolower=FALSE){
    codei <- files[,c('Year',variable)]
    if (tolower) codei$variable <- tolower(codei$variable)
    formu <- as.formula(sprintf('Year~%s',paste0(variable,collapse = '+')))
    reshape2::dcast(codei,formu,value.var = variable,fun.aggregate = length)
}
yikeshu0611/nhanesR documentation built on Jan. 29, 2022, 6:08 a.m.