R/create_demography.R

#' Create a demography data frame.
#'
#' @param df A Eurobarometer data file read in with haven.
#' @param metadata The metadata file.
#' @param detail Defaults to \code{"basic"}.
#' @importFrom dplyr filter select distinct one_of mutate mutate_at
#' @importFrom purrr set_names
#' @importFrom magrittr %>%
#' @export

create_demography <- function ( df, metadata, detail = "basic") {
  spss_name <- sr_trend <- gesis_name <- sr_name <- vars <- NULL
  new_class <- funs <- na_id <- trend_left_right <- . <- NULL
  difficulty_bills <- rescale_situtation <- NULL

  m <- metadata %>% dplyr::filter( spss_name %in% names(df)  ) %>%
    dplyr::filter( sr_trend %in% c(1,3)) %>%
    dplyr::select ( gesis_name, sr_name, spss_name, new_class, na_id, sr_trend) %>%
    dplyr::distinct (spss_name, new_class, .keep_all = TRUE)

  main_vars <- m %>% dplyr::filter( sr_trend == 1 ) %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  meta_vars <- m %>% dplyr::filter( sr_trend == 3 ) %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  all_vars <- c(main_vars, meta_vars)

  character_vars <- m %>%
    dplyr::filter( new_class == "character") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  factor_vars <- m %>%
    dplyr::filter( new_class == "factor") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  rescale_time_frequency_use_vars <- m   %>%
    dplyr::filter( new_class == "rescale_time_frequency_use") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  rescale_time_frequency_3_vars <- m   %>%
    dplyr::filter( new_class == "rescale_time_frequency_3") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  keep_numeric_vars <- m   %>%
    dplyr::filter( new_class == "keep_numeric") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  difficulty_vars <- m   %>%
    dplyr::filter( new_class == "rescale_difficulty") %>%
    dplyr::select ( sr_name, na_id ) %>%
    unlist(.)


  alphanumeric_vars <- m   %>%
    dplyr::filter( new_class == "rescale_alphanumeric_en") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  num_to_chr_vars <- m   %>%
    dplyr::filter( new_class == "num_to_chr") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  satisfaction_vars <- m %>%
    dplyr::filter( new_class == "rescale_satisfaction") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  situation_vars <- m %>%
    dplyr::filter( new_class == "rescale_situtation") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.)

  occupation_vars <- m %>%
    dplyr::filter( new_class == "rescale_occupation") %>%
    dplyr::select ( sr_name ) %>%
    unlist(.) %>%
    as.character(.)

  demography <- df %>% dplyr::select ( dplyr::one_of(m$spss_name)) %>%
    purrr::set_names(., m$sr_name) %>%
    dplyr::mutate_at ( vars(dplyr::one_of(num_to_chr_vars)),
                       as.character) %>%
    dplyr::mutate_at ( vars(dplyr::one_of(character_vars)),
                       haven::as_factor) %>%
    dplyr::mutate_at ( vars(dplyr::one_of(character_vars)),
                       as.character)

  demography <- dplyr::mutate (demography,
                      difficulty_bills = rescale_difficulty (
                        column = difficulty_bills,
                        na_labels = "Refusal (SPONT.)"))
  message ("Rescaled: Difficulty of paying bills.")

  demography <- dplyr::mutate (demography,
                        gender = surveyreader::code_gender(
                          column = demography$gender,
                          female_id = "default"))
  message ("Recoded: gender.")

  demography <- dplyr::mutate (demography,
         marital_status = surveyreader::rescale_marital_status(
         column = demography$marital_status))
  message("Recoded: marital status")



  demography <- dplyr::mutate ( demography,
                       date_interview = rescale_date_interview(
                         demography$date_interview,
                         return_class = "Date") )
  message("Recoded: date of interview to class Date.")

  if (length(num_to_chr_vars)>0) {
    demography <- dplyr::mutate_at (demography,
                           vars(dplyr::one_of(num_to_chr_vars)),
                           funs(as.character(.)))
    message("Class conversions: Numeric to character.")
  }

  if ( length(satisfaction_vars)>0) {
    demography <-dplyr::mutate_at ( demography,
                           vars(dplyr::one_of(satisfaction_vars)),
                           funs(rescale_satisfaction(column =.)))
    message("Rescaled: life satisfaction variables.")
  }

  if ( length(situation_vars) >0) {
    demography <- dplyr::mutate_at ( vars(dplyr::one_of(situation_vars)),
                            funs(rescale_situtation(column =.)))
    message("Rescaled: situtation assessment trend questions.")
  }

 if ( length(rescale_time_frequency_use_vars)>0) {
    demography <- dplyr::mutate_at ( demography,
                vars(dplyr::one_of( rescale_time_frequency_use_vars)),
                surveyreader::rescale_time_frequency_use)
    message("Rescaled time frequencies of using things variables.")
  }
  if ( length(rescale_time_frequency_3_vars)>0) {
    demography <-  dplyr::mutate_at ( demography,
                vars(dplyr::one_of( rescale_time_frequency_3_vars)),
                surveyreader::rescale_time_frequency_3)
    message("Rescaled 3-level time frequencies.")
  }

  if ( length(occupation_vars )>0) {
    demography <- dplyr::mutate_at ( demography,
                vars(dplyr::one_of( occupation_vars )),
                surveyreader::rescale_occupation)
    message("Rescaled occupation (last job) variables. ")
  }

  if ( length(alphanumeric_vars )>0) {
    demography <- dplyr::mutate_at ( demography,
                vars(dplyr::one_of( alphanumeric_vars )),
                surveyreader::rescale_alphanumeric_en)
    message("Recoded alphanumeric variables, i.e. 'twenty', etc..")
  }

  if ( length(keep_numeric_vars )>0) {
    demography <- dplyr::mutate_at ( demography,
                vars(dplyr::one_of( keep_numeric_vars )),
                surveyreader::keep_numeric)
    message("Recoded numeric variables with units or other string.")
  }

  if ( "trend_left_right" %in% names (demography)) {
    demography <- dplyr::select(demography, -trend_left_right)
  }

  summary ( demography)

}
antaldaniel/surveyreader documentation built on May 16, 2019, 2:29 a.m.