R/first_gen_function.R

#' (IN DEVELOPMENT) Determine First Generation Status
#'
#'@description This function determines the first-generation status of a student using
#'    the California Community College System Definition of First-Generation
#'    Status.
#'
#'    The definition uses the highest level of education of the student's parent(s)/guardian(s).
#'
#'    \strong{First-Generation:}
#' \enumerate{
#'    \item Grade 9 or Less
#'    \item Grade 10, 11, or 12 but did not graduate
#'    \item Highschool Graduate
#' }
#'
#'    \strong{Non-First-Generation:}
#' \enumerate{
#'    \item Some college but no degree
#'    \item AA/AS Degree
#'    \item BA/BS Degree
#'    \item Graduate or professional degree beyond a BA/BS
#' }
#'    Reference for definition: \url{https://datamart.cccco.edu/App_Doc/Scorecard_Data_Mart_Specs.pdf}
#'
#' @param x PARENT_1_STATUS
#' @param y PARENT_2_STATUS
#'
#' @return The first generation status of student given Parent Status
#'
#' @examples
#' x = c("GED", "ASSOC", "UNKNOWN", "VOC")
#' y = c("MAST", "LESS", "LESS12", "VOC")
#' # library(dplyr)
#' # SDD <- as.data.frame(cbind(x, y))
#' # SDD <- mutate(SDD, new_variable = first_gen(x, y))
#' @export
first_gen <- function(x, y) {
  low_ed <- c("GED", "HIGH", "LESS10", "LESS12", "VOC", "LESS")
  high_ed <- c("ASSOC", "BACH", "DOC", "GRAD", "MAST", "SOME")
  no_ed <- c("UNKNOWN", "")

  dplyr::case_when(
    (x %in% low_ed | x %in% no_ed) & (y %in% low_ed) ~ "First_Gen",
    (x %in% low_ed) & (y %in% low_ed | y %in% no_ed) ~ "First_Gen",
    (x %in% no_ed) & (y %in% no_ed) ~ "Unknown",
    (x %in% high_ed) & (y %in% no_ed | y %in% low_ed | y %in% high_ed) ~ "Not_First_Gen",
    (x %in% no_ed | x %in% low_ed | x %in% high_ed) & (y %in% high_ed) ~ "Not_First_Gen",
    TRUE ~ "Unknown")
}
christian-million/researchR documentation built on May 15, 2019, 12:45 p.m.