Description Usage Arguments Value Author(s) Examples
This function will pull all the variables listed in an input vector, pull these variables from each of the CPS2, Nutrition Cohort, and Lifelink and merge them into a single file for later processing. It is a simple program, just saves me from having to write a lot of readRDS() expressions.
Note: The returned data will be subset to MASTER==1 and NUT_92==1.
1 | makeCohort(vars, sex = "BOTH", intialPath = file.path("s:/cps"))
|
vars |
Character vector of variable names. Do not include "ID" in the list. Not case sensitive. |
sex |
"MEN", "WOMEN" or "BOTH". Defaults to "BOTH" |
intialPath |
Defaults to the S: drive, but I can also put in a local path. |
A data frame including all the variables listed in the input vector.
Brian Carter
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (vars, sex = "BOTH", intialPath = file.path("s:/cps"))
{
require(dplyr)
message("Last updated on July 30th 2019")
message("Cohort only includes data 1982, and 1992-2015. If followup goes later, please update the function")
menFiles <- NULL
menVars <- NULL
womenFiles <- NULL
womenVars <- NULL
if (toupper(sex) == "MEN") {
foo <- dplyr::filter(variables$men, Variables %in% vars)
foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
menFiles <- unique(foo$FullPath)
menVars <- c(unique(foo$Variables), "ID")
}
if (toupper(sex) == "WOMEN") {
foo <- dplyr::filter(variables$women, Variables %in%
vars)
foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
womenFiles <- unique(foo$FullPath)
womenVars <- c(unique(foo$Variables), "ID")
}
if (toupper(sex) == "BOTH") {
foo <- dplyr::filter(rbind(variables$men, variables$women),
Variables %in% vars)
foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
menFiles <- unique(foo$FullPath[foo$Sex == "MEN"])
menVars <- c(unique(foo$Variables[foo$Sex == "MEN"]),
"ID")
womenFiles <- unique(foo$FullPath[foo$Sex == "WOMEN"])
womenVars <- c(unique(foo$Variables[foo$Sex == "WOMEN"]),
"ID")
}
head(foo)
cl <- parallel::makeCluster(parallel::detectCores(logical = T))
parallel::clusterExport(cl, c("foo", "menFiles", "menVars",
"womenFiles", "womenVars"))
menCohort <- parallel::parLapply(cl, menFiles, function(x) {
dat <- readRDS(x)
dat <- dat[, names(dat) %in% menVars]
return(dat)
}) %>% Reduce(function(x, y) full_join(x, y, "ID"), .)
womenCohort <- parallel::parLapply(cl, womenFiles, function(x) {
dat <- readRDS(x)
dat <- dat[, names(dat) %in% womenVars]
return(dat)
}) %>% Reduce(function(x, y) full_join(x, y, "ID"), .)
parallel::stopCluster(cl)
rm(cl)
if (toupper(sex) == "MEN") {
return(menCohort)
}
if (toupper(sex) == "WOMEN") {
return(womenCohort)
}
if (toupper(sex) == "BOTH") {
return(dplyr::bind_rows(menCohort, womenCohort))
}
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.