R/reweight_onets.R

Defines functions reweight_onets_2010 reweight_onets_2009 reweight_onets_2006 reweight_onets_2000

Documented in reweight_onets_2000 reweight_onets_2006 reweight_onets_2009 reweight_onets_2010

#' Reweight O-NET data in O-NET SOC-2000 to O-NET SOC-2010
#'
#' This function reweights the O-NET data for a particular year using the previously constructed O*NET SOC 2010 employment.
#' To do so it converts the SOC 2010 employment, using previously generated crosswalks, to the relevant
#' SOC and then uses this employment as weights to convert the data back to O*NET SOC-2010 as weighted means.
#'
#' This function is to be used for years coded to O*NET SOC 2000
#'
#' @param year - the year for which to convert the O*NET data
#' @param emp.data - the data created by oes_onet_assemble
#' @param onet.data - the dataset created by onet_skills_read()
#' @param crosswalks - the list of crosswalks produced by onet_soc_crosswalks()
#'
#' @return A clean annual dataset of O*NET variables converted to SOC-2010
#' @export
reweight_onets_2000 <- function(yr=NULL,emp.data=NULL,onet.data=NULL,crosswalks=NULL) {

  # read in O*NET data

  onet <- as.data.table(onet.data) %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("O*NET-SOC Code","element","data_I","data_L","Incumbent")) %>%
    dplyr::rename("onetsoc2000code"="O*NET-SOC Code")

  # read in employment data, limit to the selected year
  empl <- as.data.table(emp.data) %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("onetsoc2010code","onet_oes_emp"))

  #############################
  #### REWEIGHTING ############

  # convert employment figures from 2010 SOC to 2009 SOC in the employment data created by oes_onet_assemble()
    data_2010_to_2009 <- merge(crosswalks[["soc10_to_09"]],emp.data,by="onetsoc2010code")  %>%
      dplyr::mutate(weighted_emp = onet_oes_emp*weight) %>%
      dplyr::select(-c("onet_oes_emp")) %>%
    dplyr::group_by(onetsoc2009code) %>%
    dplyr::mutate(onet_oes_emp = sum(weighted_emp)) %>%
    dplyr::select(c("onetsoc2009code","onet_oes_emp")) %>%
    dplyr::distinct()

    # convert employment figures from 2009 SOC to 2006 SOC in the employment data created by oes_onet_assemble()

    data_2009_to_2006 <- merge(crosswalks[["soc09_to_06"]],data_2010_to_2009,by="onetsoc2009code")  %>%
      dplyr::mutate(weighted_emp = onet_oes_emp*weight) %>%
      dplyr::select(-c("onet_oes_emp")) %>%
      dplyr::group_by(onetsoc2006code) %>%
      dplyr::mutate(onet_oes_emp = sum(weighted_emp)) %>%
      dplyr::select(c("onetsoc2006code","onet_oes_emp")) %>%
      dplyr::distinct()

    # convert employment figures from 2009 SOC to 2006 SOC in the employment data created by oes_onet_assemble()

    data_2006_to_2000 <- merge(crosswalks[["soc06_to_00"]],data_2009_to_2006,by="onetsoc2006code")  %>%
      dplyr::mutate(weighted_emp = onet_oes_emp*weight) %>%
      dplyr::select(-c("onet_oes_emp")) %>%
      dplyr::group_by(onetsoc2000code) %>%
      dplyr::mutate(onet_oes_emp = sum(weighted_emp)) %>%
      dplyr::select(c("onetsoc2000code","onet_oes_emp")) %>%
      dplyr::distinct()

  ## initialize employment data vector based on year, merge to the O*NET information


    onet_employment <- data.table(data_2006_to_2000)
    merged.data <- merge(onet_employment,onet,by=as.character("onetsoc2000code") )

  # convert back to SOC2010 by taking means within recipient SOC codes, using employment as weights

    data_2000_to_2006 <- merge(crosswalks[["soc00_to_06"]],merged.data,by="onetsoc2000code")  %>%
      dplyr::group_by(onetsoc2006code,element) %>%
      dplyr::summarise(data_IM = weighted.mean(data_I,onet_oes_emp),
                data_LV = weighted.mean(data_L,onet_oes_emp),
                Inc = weighted.mean(Incumbent,onet_oes_emp),
                employment = sum(onet_oes_emp)) %>%
      dplyr::select(c("onetsoc2006code","employment","element","data_IM","data_LV","Inc")) %>%
      dplyr::distinct() %>%
      dplyr::rename("onet_oes_emp"="employment",
             "data_I"="data_IM",
             "data_L"="data_LV",
             "Incumbent"="Inc")

    data_2006_to_2009 <- merge(crosswalks[["soc06_to_09"]],data_2000_to_2006,by="onetsoc2006code")  %>%
      dplyr::group_by(onetsoc2009code,element) %>%
      dplyr::summarise(data_IM = weighted.mean(data_I,onet_oes_emp),
                data_LV = weighted.mean(data_L,onet_oes_emp),
                Inc = weighted.mean(Incumbent,onet_oes_emp),
                employment = sum(onet_oes_emp)) %>%
      dplyr::select(c("onetsoc2009code","employment","element","data_IM","data_LV","Inc")) %>%
      dplyr::distinct() %>%
      dplyr::rename("onet_oes_emp"="employment",
             "data_I"="data_IM",
             "data_L"="data_LV",
             "Incumbent"="Inc")

    data_2009_to_2010 <- merge(crosswalks[["soc09_to_10"]],data_2006_to_2009,by="onetsoc2009code")  %>%
      dplyr::group_by(onetsoc2010code,element) %>%
      dplyr::summarise(data_IM = weighted.mean(data_I,onet_oes_emp),
                data_LV = weighted.mean(data_L,onet_oes_emp),
                Inc = weighted.mean(Incumbent,onet_oes_emp),
                employment = sum(onet_oes_emp)) %>%
      dplyr::select(c("onetsoc2010code","employment","element","data_IM","data_LV","Inc")) %>%
      dplyr::distinct() %>%
      dplyr::rename("onet_oes_emp"="employment",
             "data_I"="data_IM",
             "data_L"="data_LV",
             "Incumbent"="Inc")

    final_data <- data_2009_to_2010

  final_data <- final_data %>%
    mutate(year = yr)

  return(final_data)

}

#' Reweight O-NET data in O-NET SOC-2006 to O-NET SOC-2010
#'
#' This function reweights the O-NET data for a particular year using the previously constructed O*NET SOC 2010 employment.
#' To do so it converts the SOC 2010 employment, using previously generated crosswalks, to the relevant
#' SOC and then uses this employment as weights to convert the data back to O*NET SOC-2010 as weighted means.
#'
#' This function is to be used for years coded to O*NET SOC 2006
#'
#' @param year - the year for which to convert the O*NET data
#' @param emp.data - the data created by oes_onet_assemble
#' @param onet.data - the dataset created by onet_skills_read()
#' @param crosswalks - the list of crosswalks produced by onet_soc_crosswalks()
#'
#' @return A clean annual dataset of O*NET variables converted to SOC-2010
#' @export
reweight_onets_2006 <- function(yr=NULL,emp.data=NULL,onet.data=NULL,crosswalks=NULL) {

  # read in O*NET data

  onet <- as.data.table(onet.data) %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("O*NET-SOC Code","element","data_I","data_L","Incumbent")) %>%
    dplyr::rename("onetsoc2006code"="O*NET-SOC Code")

  # read in employment data, limit to the selected year
  empl <- as.data.table(emp.data) %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("onetsoc2010code","onet_oes_emp"))

  #############################
  #### REWEIGHTING ############

  # convert employment figures from 2010 SOC to 2009 SOC in the employment data created by oes_onet_assemble()
  data_2010_to_2009 <- merge(crosswalks[["soc10_to_09"]],emp.data,by="onetsoc2010code")  %>%
    dplyr::mutate(weighted_emp = onet_oes_emp*weight) %>%
    dplyr::select(-c("onet_oes_emp")) %>%
    dplyr::group_by(onetsoc2009code) %>%
    dplyr::mutate(onet_oes_emp = sum(weighted_emp)) %>%
    dplyr::select(c("onetsoc2009code","onet_oes_emp")) %>%
    dplyr::distinct()

  # convert employment figures from 2009 SOC to 2006 SOC in the employment data created by oes_onet_assemble()

  data_2009_to_2006 <- merge(crosswalks[["soc09_to_06"]],data_2010_to_2009,by="onetsoc2009code")  %>%
    dplyr::mutate(weighted_emp = onet_oes_emp*weight) %>%
    dplyr::select(-c("onet_oes_emp")) %>%
    dplyr::group_by(onetsoc2006code) %>%
    dplyr::mutate(onet_oes_emp = sum(weighted_emp)) %>%
    dplyr::select(c("onetsoc2006code","onet_oes_emp")) %>%
    dplyr::distinct()

  ## initialize employment data vector based on year, merge to the O*NET information


  onet_employment <- data.table(data_2009_to_2006)
  merged.data <- merge(onet_employment,onet,by=as.character("onetsoc2006code") )

  # convert back to SOC2010 by taking means within recipient SOC codes, using employment as weights

  data_2006_to_2009 <- merge(crosswalks[["soc06_to_09"]],merged.data,by="onetsoc2006code")  %>%
    dplyr::group_by(onetsoc2009code,element) %>%
    dplyr::summarise(data_IM = weighted.mean(data_I,onet_oes_emp),
              data_LV = weighted.mean(data_L,onet_oes_emp),
              Inc = weighted.mean(Incumbent,onet_oes_emp),
              employment = sum(onet_oes_emp)) %>%
    dplyr::select(c("onetsoc2009code","employment","element","data_IM","data_LV","Inc")) %>%
    dplyr::distinct() %>%
    dplyr::rename("onet_oes_emp"="employment",
           "data_I"="data_IM",
           "data_L"="data_LV",
           "Incumbent"="Inc")

  data_2009_to_2010 <- merge(crosswalks[["soc09_to_10"]],data_2006_to_2009,by="onetsoc2009code")  %>%
    dplyr::group_by(onetsoc2010code,element) %>%
    dplyr::summarise(data_IM = weighted.mean(data_I,onet_oes_emp),
              data_LV = weighted.mean(data_L,onet_oes_emp),
              Inc = weighted.mean(Incumbent,onet_oes_emp),
              employment = sum(onet_oes_emp)) %>%
    dplyr::select(c("onetsoc2010code","employment","element","data_IM","data_LV","Inc")) %>%
    dplyr::distinct() %>%
    dplyr::rename("onet_oes_emp"="employment",
           "data_I"="data_IM",
           "data_L"="data_LV",
           "Incumbent"="Inc")

  final_data <- data_2009_to_2010

  final_data <- final_data %>%
    mutate(year = yr)

  return(final_data)

}

#' Reweight O-NET data in O-NET SOC-2009 to O-NET SOC-2010
#'
#' This function reweights the O-NET data for a particular year using the previously constructed O*NET SOC 2010 employment.
#' To do so it converts the SOC 2010 employment, using previously generated crosswalks, to the relevant
#' SOC and then uses this employment as weights to convert the data back to O*NET SOC-2010 as weighted means.
#'
#' This function is to be used for years coded to O*NET SOC 2009
#'
#' @param year - the year for which to convert the O*NET data
#' @param emp.data - the data created by oes_onet_assemble
#' @param onet.data - the dataset created by onet_skills_read()
#' @param crosswalks - the list of crosswalks produced by onet_soc_crosswalks()
#'
#' @return A clean annual dataset of O*NET variables converted to SOC-2010
#' @export
reweight_onets_2009 <- function(yr=NULL,emp.data=NULL,onet.data=NULL,crosswalks=NULL) {

  # read in O*NET data

  onet <- onet.data %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("O*NET-SOC Code","element","data_I","data_L","Incumbent")) %>%
    dplyr::rename("onetsoc2009code"="O*NET-SOC Code")

  # read in employment data, limit to the selected year
  empl <- emp.data %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("onetsoc2010code","onet_oes_emp"))

  #############################
  #### REWEIGHTING ############

  # convert employment figures from 2010 SOC to 2009 SOC in the employment data created by oes_onet_assemble()
  data_2010_to_2009 <- merge(crosswalks[["soc10_to_09"]],emp.data,by="onetsoc2010code")  %>%
    dplyr::mutate(weighted_emp = onet_oes_emp*weight) %>%
    dplyr::select(-c("onet_oes_emp")) %>%
    dplyr::group_by(onetsoc2009code) %>%
    dplyr::mutate(onet_oes_emp = sum(weighted_emp)) %>%
    dplyr::select(c("onetsoc2009code","onet_oes_emp")) %>%
    dplyr::distinct()

  ## initialize employment data vector based on year, merge to the O*NET information


  onet_employment <- data.table(data_2010_to_2009)
  merged.data <- merge(onet_employment,onet,by=as.character("onetsoc2009code") )

  # convert back to SOC2010 by taking means within recipient SOC codes, using employment as weights

  data_2009_to_2010 <- merge(crosswalks[["soc09_to_10"]],merged.data,by="onetsoc2009code")  %>%
    dplyr::group_by(onetsoc2010code,element) %>%
    dplyr::summarise(data_IM = weighted.mean(data_I,onet_oes_emp),
              data_LV = weighted.mean(data_L,onet_oes_emp),
              Inc = weighted.mean(Incumbent,onet_oes_emp),
              employment = sum(onet_oes_emp)) %>%
    dplyr::select(c("onetsoc2010code","employment","element","data_IM","data_LV","Inc")) %>%
    dplyr::distinct() %>%
    dplyr::rename("onet_oes_emp"="employment",
           "data_I"="data_IM",
           "data_L"="data_LV",
           "Incumbent"="Inc")

  final_data <- data_2009_to_2010

  final_data <- final_data %>%
    mutate(year = yr)

}

#' Clean O-NET data in O-NET SOC-2010 consistent with other reweighted datasets
#'
#' This function reweights the O-NET data for a particular year using the previously constructed O*NET SOC 2010 employment.
#' To do so it converts the SOC 2010 employment, using previously generated crosswalks, to the relevant
#' SOC and then uses this employment as weights to convert the data back to O*NET SOC-2010 as weighted means.
#'
#' This function is to be used for years coded to O*NET SOC 2010 (no reweighting, simply produces a dataset consistent with those
#' created by the other reweighting functions)
#'
#' @param year - the year for which to convert the O*NET data
#' @param emp.data - the data created by oes_onet_assemble
#' @param onet.data - the dataset created by onet_skills_read()
#' @param crosswalks - the list of crosswalks produced by onet_soc_crosswalks()
#'
#' @return A clean annual dataset of O*NET variables converted to SOC-2010
#' @export
reweight_onets_2010 <- function(yr=NULL,emp.data=NULL,onet.data=NULL,crosswalks=NULL) {

  # read in O*NET data

  onet <- onet.data %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("O*NET-SOC Code","element","data_I","data_L","Incumbent")) %>%
    dplyr::rename("onetsoc2010code"="O*NET-SOC Code")

  # read in employment data, limit to the selected year
  empl <- emp.data %>%
    dplyr::filter(year==yr) %>%
    dplyr::select(c("onetsoc2010code","onet_oes_emp"))

  #############################
  #### REWEIGHTING ############

  ## initialize employment data vector based on year, merge to the O*NET information


  onet_employment <- data.table(empl)
  merged.data <- merge(onet_employment,onet,by=as.character("onetsoc2010code") )

  # convert back to SOC2010 by taking means within recipient SOC codes, using employment as weights

  final_data <- merged.data

  final_data <- final_data %>%
    dplyr::mutate(year = yr)

  return(final_data)

}
djmorris1989/onetmappinguk documentation built on June 14, 2020, 10:04 a.m.