R/fped_read.R

Defines functions fped_readi fped_read

Documented in fped_read

#' read FPED data
#'
#' @param years years
#' @param day 1 or 2
#' @param dietary tot or iff
#' @param version 2010 or 2015
#' @param cat logical
#' @param fun sum or mean
#'
#' @return FP components with HEI 2010 or HEI 2015 necessary FP
#' @export
#'
fped_read <- function(years,day=c('1','2'),dietary=c('tot','iff'),
                      version=c('2010','2015'),fun=c('sum','mean'),
                      cat=FALSE){

    day <- as.character(day)
    dietary <- match.arg(dietary)
    version <- as.character(version)
    version <- match.arg(version)

    if (length(day)==1){
        fped <- fped_readi(years,day,dietary,version,cat)
    }else if(length(day)==2){
        fun <- match.arg(fun)
        if (dietary=='iff') join <- c('seqn','line') else join <- 'seqn'
        fped1 <- fped_readi(years,'1',dietary,version,cat)
        fped2 <- fped_readi(years,'2',dietary,version,cat)
        fped <- dplyr::inner_join(fped1,fped2,join)
        choice <- set::not(colnames(fped2),join)
        for (i in choice) {
            which <- which(colnames(fped) %in% paste0(i,c('.x','.y')))
            if (fun =='sum'){
                fped$last <- rowSums(fped[,which],na.rm = TRUE)
            }else if(fun=='mean'){
                fped$last <- rowMeans(fped[,which],na.rm = TRUE)
            }
            fped <- fped[,-which]
            colnames(fped)[ncol(fped)] <- i
        }
    }
    return(fped)
}
fped_readi <- function(years,day=c('1','2'),dietary=c('tot','iff'),
                      version=c('2010','2015'),
                      cat=FALSE){

    fpeddir <- paste0(get_config_path(),'/fped')
    years <- prepare_years(years)
    mped <- data.frame()
    mpedfile <- c()
    if (dietary=='iff') join <- c('seqn','line') else join <- 'seqn'
    var <- c(join,'ridageyr','rstz',
             'f_total','f_citmlb','f_other',
             'v_drkgr','v_tomato','v_dpyel',
             'v_potato','v_starcy','v_other','v_total',
             'legumes',
             'g_whl','g_nwhl','g_total',
             'd_milk','d_yogurt','d_cheese','d_total',
             'm_meat','m_frank','m_organ','m_poult','m_fish_hi','m_fish_lo',
             'm_mpf',
             'm_egg','m_soy','m_nutsd',
             'add_sug','discfat_oil','discfat_sol','a_bev')
    if ('1999-2000' %in% years){
        mfile <- list.files(fpeddir,sprintf('pyr_%s.sas7bdat',dietary),recursive = TRUE,full.names = TRUE)
        mpedfile <- c(mpedfile,do::file.name(mfile))
        mped <- as.data.frame(haven::read_sas(mfile))
        colnames(mped) <- tolower(colnames(mped))
        mped$drddrstz <- ifelse(is.na(mped$drddrstz),mped$drddrsts,mped$drddrstz)
        mped <- drop_col(mped,'drddrsts')
        colnames(mped) <- rename_line(colnames(mped))
        colnames(mped) <- rename_rstz(colnames(mped))
        mped <- mped[,var]

        seqn <- nhs_read(nhs_tsv('demo',years = 1999,cat = FALSE),'seqn',cat = FALSE)$seqn
        mped <- mped[mped$seqn %in% seqn,]

        wjfrt <- as.data.frame(haven::read_spss(list.files(fpeddir,'cnppmyp_v1NHANES9900_wjfrt.sav',full.names = TRUE)))
        colnames(wjfrt) <- tolower(colnames(wjfrt))
        iff <- nhs_read(nhs_tsv('drxiff',years = 1999,cat = FALSE),'seqn',cat = FALSE,Year = FALSE)
        colnames(iff)[colnames(iff) =='drxiline'] <- 'line'
        jfrt <- dplyr::inner_join(iff,wjfrt,c('drdifdcd'='foodcode'))
        jfrt <- drop_col(jfrt,'drdifdcd')
        colnames(jfrt)[colnames(jfrt)=='frtjuice'] <- 'f_juice'
        colnames(jfrt)[colnames(jfrt)=='wholefrt'] <- 'f_whole'
        if (dietary=='tot'){
            f_juice <- aggregate(jfrt$f_juice,list(seqn = jfrt$seqn),sum)
            colnames(f_juice)[2] <- 'f_juice'
            f_whole <- aggregate(jfrt$f_whole,list(seqn = jfrt$seqn),sum)
            colnames(f_whole)[2] <- 'f_whole'
            jfrt <- dplyr::inner_join(f_whole,f_juice,'seqn')
        }
        mped <- dplyr::inner_join(mped,jfrt,join)
    }
    if ('2001-2002' %in% years){
        mfile <- list.files(fpeddir,sprintf('pyr_%s.sas7bdat',dietary),recursive = TRUE,full.names = TRUE)
        mpedfile <- unique(c(mpedfile,do::file.name(mfile)))
        mped01 <- as.data.frame(haven::read_sas(mfile))
        colnames(mped01) <- tolower(colnames(mped01))
        mped01$drddrstz <- ifelse(is.na(mped01$drddrstz),mped01$drddrsts,mped01$drddrstz)
        mped01 <- drop_col(mped01,'drddrsts')
        colnames(mped01)[colnames(mped01) == 'drxiline'] <- 'line'
        colnames(mped01)[colnames(mped01) == 'drddrstz'] <- 'rstz'
        mped01 <- mped01[,var]

        seqn <- nhs_read(nhs_tsv('demo',years = 2001,cat = FALSE),'seqn',cat = FALSE)$seqn
        mped01 <- mped01[mped01$seqn %in% seqn,]

        wjfrt <- as.data.frame(haven::read_spss(list.files(fpeddir,'cnppmypyrequivdb_v1_wjfrt.sav',full.names = TRUE)))
        colnames(wjfrt) <- tolower(colnames(wjfrt))
        iff <- nhs_read(nhs_tsv('drxiff',years = 2001,cat = FALSE),'seqn',cat = FALSE,Year = FALSE)
        colnames(iff)[colnames(iff) =='drxiline'] <- 'line'
        jfrt <- dplyr::inner_join(iff,wjfrt,c('drdifdcd'='foodcode'))
        jfrt <- drop_col(jfrt,'drdifdcd','foodname')
        colnames(jfrt)[colnames(jfrt)=='frtjuice'] <- 'f_juice'
        colnames(jfrt)[colnames(jfrt)=='wholefrt'] <- 'f_whole'
        if (dietary=='tot'){
            f_juice <- aggregate(jfrt$f_juice,list(seqn = jfrt$seqn),sum)
            colnames(f_juice)[2] <- 'f_juice'
            f_whole <- aggregate(jfrt$f_whole,list(seqn = jfrt$seqn),sum)
            colnames(f_whole)[2] <- 'f_whole'
            jfrt <- dplyr::inner_join(f_whole,f_juice,'seqn')
        }
        mped01 <- dplyr::inner_join(mped01,jfrt,join)

        mped <- rbind(mped,mped01)
    }
    if ('2003-2004' %in% years){
        mped03 <- fped0304(day=day,dietary=dietary)
        mped <- rbind(mped,mped03)
    }
    var <- c(join,'ridageyr','rstz',
             'f_citmlb','f_other','f_whole','f_juice','f_total',
             'v_drkgr',
             'v_redor_tomato','v_redor_other','v_redor_total',
             'v_starchy_potato','v_starchy_other','v_starchy_total',
             'v_other','v_total','v_legumes',
             'g_whole','g_refined','g_total',
             'd_milk','d_yogurt','d_cheese','d_total',
             'pf_meat','pf_curedmeat','pf_organ','pf_poult','pf_seafd_hi','pf_seafd_low','pf_mps_total',
             'pf_eggs','pf_soy','pf_nutsds','pf_legumes',
             'pf_total',
             'add_sugars','oils','solid_fats','a_drinks')
    if (nrow(mped)>0){
        # 1. Fruit  - - - - -  - - - - -  - - - - -
        # f_citmlb,f_other,f_total,(no f_juice in 1999 and 2001)

        # 2. Vegetables  - - - - -  - - - - -  - - - - -  - - - - -
        # v_drkgr
        colnames(mped)[colnames(mped) == 'v_tomato'] <- 'v_redor_tomato'
        colnames(mped)[colnames(mped) == 'v_dpyel'] <- 'v_redor_other'
        mped$v_redor_total <- mped$v_redor_other + mped$v_redor_tomato

        colnames(mped)[colnames(mped) == 'v_potato'] <- 'v_starchy_potato'
        colnames(mped)[colnames(mped) == 'v_starcy'] <- 'v_starchy_other'
        mped$v_starchy_total <- mped$v_starchy_other + mped$v_starchy_potato
        # v_other
        # v_total
        colnames(mped)[colnames(mped) == 'legumes'] <- 'v_legumes'

        # 3. grain  - - - - -  - - - - -  - - - - -  - - - - -
        colnames(mped)[colnames(mped) == 'g_whl'] <- 'g_whole'
        colnames(mped)[colnames(mped) == 'g_nwhl'] <- 'g_refined'
        # g_total

        # 4. diary  - - - - -  - - - - -  - - - - -  - - - - -
        # d_milk,d_yogurt,d_cheese,d_total

        # 5. protein foods(pf)
        colnames(mped)[colnames(mped) == 'm_meat'] <- 'pf_meat'
        colnames(mped)[colnames(mped) == 'm_frank'] <- 'pf_curedmeat'
        colnames(mped)[colnames(mped) == 'm_organ'] <- 'pf_organ'
        colnames(mped)[colnames(mped) == 'm_poult'] <- 'pf_poult'
        colnames(mped)[colnames(mped) == 'm_fish_hi'] <- 'pf_seafd_hi'
        colnames(mped)[colnames(mped) == 'm_fish_lo'] <- 'pf_seafd_low'
        colnames(mped)[colnames(mped) == 'm_mpf'] <- 'pf_mps_total'
        colnames(mped)[colnames(mped) == 'm_egg'] <- 'pf_eggs'
        colnames(mped)[colnames(mped) == 'm_soy'] <- 'pf_soy'
        colnames(mped)[colnames(mped) == 'm_nutsd'] <- 'pf_nutsds'
        mped$pf_legumes <- mped$v_legumes*4
        mped$pf_total <- mped$pf_mps_total+mped$pf_eggs+mped$pf_soy+mped$pf_nutsds

        colnames(mped)[colnames(mped) == 'add_sug'] <- 'add_sugars'
        colnames(mped)[colnames(mped) == 'discfat_oil'] <- 'oils'
        colnames(mped)[colnames(mped) == 'discfat_sol'] <- 'solid_fats'
        colnames(mped)[colnames(mped) == 'a_bev'] <- 'a_drinks'
        mped <- mped[,var]
    }
    if (any(! years %in% prepare_years(1999:2004))){
        years <- sapply(years, function(i) i |> do::knife_left(2) |> do::Replace0('-[0-9]{2}'))
        (pattern <- paste0('fped_dr',day,dietary,'_',years,'.sas7bdat'))
        if (cat) cat(paste0(paste0(c(mpedfile,pattern),collapse = '\n')),'\n')
        fped <- list.files(path = fpeddir,pattern = paste0(pattern,collapse = '|'),ignore.case = TRUE,full.names = TRUE,recursive = TRUE)
        x <- lapply(fped, function(i){
            x <- haven::read_sas(i)
            colnames(x) <- tolower(colnames(x)) |> do::Replace0('dr1t_','dr2t_','dr1i_','dr2i_')
            colnames(x)[colnames(x) %in% c('dr1iline','dr2iline')] <- 'line'
            colnames(x)[colnames(x) %in% c('dr1drstz','dr2drstz')] <- 'rstz'
            x$f_whole <- x$f_citmlb+x$f_other
            x <- as.data.frame(x[,var])
            x
        }) |> do.call(what = plyr::rbind.fill)
        mped <- rbind(mped,x)
    }
    if (version=='2010'){
        # fped$allmeat <- fped$pf_mps_total+fped$pf_eggs+fped$pf_nutsds+fped$pf_soy
        mped$seaplant <- mped$pf_seafd_hi+mped$pf_seafd_low+mped$pf_nutsds+mped$pf_soy
        mped$addsugc <- 16*mped$add_sugars
        mped$solfatc <- mped$solid_fats*9
    }else if(version=='2015'){
        mped$vtotalleg <- mped$v_total + mped$v_legumes
        mped$vdrkgrleg <-mped$v_drkgr + mped$v_legumes
        mped$pfallprotleg <- mped$pf_total + mped$pf_legumes
        mped$pfseaplantleg <- mped$pf_seafd_hi + mped$pf_seafd_low + mped$pf_nutsds + mped$pf_soy + mped$pf_legumes
    }
    mped <- mped[mped$ridageyr>=2 & mped$rstz==1,]
    mped <- drop_col(mped,'rstz','ridageyr')
    return(mped)
}
yikeshu0611/nhanesR documentation built on Jan. 29, 2022, 6:08 a.m.