makeCohort: makeCohort() takes a vector of CPS2 variable names and merges...

Description Usage Arguments Value Author(s) Examples

View source: R/makeCohort.R

Description

This function will pull all the variables listed in an input vector, pull these variables from each of the CPS2, Nutrition Cohort, and Lifelink and merge them into a single file for later processing. It is a simple program, just saves me from having to write a lot of readRDS() expressions.

Note: The returned data will be subset to MASTER==1 and NUT_92==1.

Usage

1
makeCohort(vars, sex = "BOTH", intialPath = file.path("s:/cps"))

Arguments

vars

Character vector of variable names. Do not include "ID" in the list. Not case sensitive.

sex

"MEN", "WOMEN" or "BOTH". Defaults to "BOTH"

intialPath

Defaults to the S: drive, but I can also put in a local path.

Value

A data frame including all the variables listed in the input vector.

Author(s)

Brian Carter

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (vars, sex = "BOTH", intialPath = file.path("s:/cps"))
{
    require(dplyr)
    message("Last updated on July 30th 2019")
    message("Cohort only includes data 1982, and 1992-2015.  If followup goes later, please update the function")
    menFiles <- NULL
    menVars <- NULL
    womenFiles <- NULL
    womenVars <- NULL
    if (toupper(sex) == "MEN") {
        foo <- dplyr::filter(variables$men, Variables %in% vars)
        foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
        menFiles <- unique(foo$FullPath)
        menVars <- c(unique(foo$Variables), "ID")
    }
    if (toupper(sex) == "WOMEN") {
        foo <- dplyr::filter(variables$women, Variables %in%
            vars)
        foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
        womenFiles <- unique(foo$FullPath)
        womenVars <- c(unique(foo$Variables), "ID")
    }
    if (toupper(sex) == "BOTH") {
        foo <- dplyr::filter(rbind(variables$men, variables$women),
            Variables %in% vars)
        foo$FullPath <- file.path(initialPath, foo$subPath, foo$File)
        menFiles <- unique(foo$FullPath[foo$Sex == "MEN"])
        menVars <- c(unique(foo$Variables[foo$Sex == "MEN"]),
            "ID")
        womenFiles <- unique(foo$FullPath[foo$Sex == "WOMEN"])
        womenVars <- c(unique(foo$Variables[foo$Sex == "WOMEN"]),
            "ID")
    }
    head(foo)
    cl <- parallel::makeCluster(parallel::detectCores(logical = T))
    parallel::clusterExport(cl, c("foo", "menFiles", "menVars",
        "womenFiles", "womenVars"))
    menCohort <- parallel::parLapply(cl, menFiles, function(x) {
        dat <- readRDS(x)
        dat <- dat[, names(dat) %in% menVars]
        return(dat)
    }) %>% Reduce(function(x, y) full_join(x, y, "ID"), .)
    womenCohort <- parallel::parLapply(cl, womenFiles, function(x) {
        dat <- readRDS(x)
        dat <- dat[, names(dat) %in% womenVars]
        return(dat)
    }) %>% Reduce(function(x, y) full_join(x, y, "ID"), .)
    parallel::stopCluster(cl)
    rm(cl)
    if (toupper(sex) == "MEN") {
        return(menCohort)
    }
    if (toupper(sex) == "WOMEN") {
        return(womenCohort)
    }
    if (toupper(sex) == "BOTH") {
        return(dplyr::bind_rows(menCohort, womenCohort))
    }
  }

buddha2490/MargotFun documentation built on Nov. 4, 2019, 8:16 a.m.