makeCohort: makeCohort() takes a vector of CPS2 variable names and merges...
In buddha2490/MargotFun: Just a bunch of useful functions to do my job.

Description Usage Arguments Value Author(s) Examples

View source: R/makeCohort.R

This function will pull all the variables listed in an input vector, pull these variables from each of the CPS2, Nutrition Cohort, and Lifelink and merge them into a single file for later processing. It is a simple program, just saves me from having to write a lot of readRDS() expressions.

Note: The returned data will be subset to MASTER==1 and NUT_92==1.

1	makeCohort(vars, sex = "BOTH", intialPath = file.path("s:/cps"))

`vars`	Character vector of variable names. Do not include "ID" in the list. Not case sensitive.
`sex`	"MEN", "WOMEN" or "BOTH". Defaults to "BOTH"
`intialPath`	Defaults to the S: drive, but I can also put in a local path.

A data frame including all the variables listed in the input vector.

Brian Carter

##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (vars, sex = "BOTH", intialPath = file.path("s:/cps"))
{
    require(dplyr)
    message("Last updated on July 30th 2019")
    message("Cohort only includes data 1982, and 1992-2015.  If followup goes later, please update the function")
    menFiles <- NULL
    menVars <- NULL
    womenFiles <- NULL
    womenVars <- NULL
    if (toupper(sex) == "MEN") {
        foo <- dplyr::filter(variables$men, Variables %in% vars)
        foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
        menFiles <- unique(foo$FullPath)
        menVars <- c(unique(foo$Variables), "ID")
    }
    if (toupper(sex) == "WOMEN") {
        foo <- dplyr::filter(variables$women, Variables %in%
            vars)
        foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
        womenFiles <- unique(foo$FullPath)
        womenVars <- c(unique(foo$Variables), "ID")
    }
    if (toupper(sex) == "BOTH") {
        foo <- dplyr::filter(rbind(variables$men, variables$women),
            Variables %in% vars)
        foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
        menFiles <- unique(foo$FullPath[foo$Sex == "MEN"])
        menVars <- c(unique(foo$Variables[foo$Sex == "MEN"]),
            "ID")
        womenFiles <- unique(foo$FullPath[foo$Sex == "WOMEN"])
        womenVars <- c(unique(foo$Variables[foo$Sex == "WOMEN"]),
            "ID")
    }
    head(foo)
    cl <- parallel::makeCluster(parallel::detectCores(logical = T))
    parallel::clusterExport(cl, c("foo", "menFiles", "menVars",
        "womenFiles", "womenVars"))
    menCohort <- parallel::parLapply(cl, menFiles, function(x) {
        dat <- readRDS(x)
        dat <- dat[, names(dat) %in% menVars]
        return(dat)
    }) %>% Reduce(function(x, y) full_join(x, y, "ID"), .)
    womenCohort <- parallel::parLapply(cl, womenFiles, function(x) {
        dat <- readRDS(x)
        dat <- dat[, names(dat) %in% womenVars]
        return(dat)
    }) %>% Reduce(function(x, y) full_join(x, y, "ID"), .)
    parallel::stopCluster(cl)
    rm(cl)
    if (toupper(sex) == "MEN") {
        return(menCohort)
    }
    if (toupper(sex) == "WOMEN") {
        return(womenCohort)
    }
    if (toupper(sex) == "BOTH") {
        return(dplyr::bind_rows(menCohort, womenCohort))
    }
  }