R/munge_data.R

gender_list <- c("Male", "Female")
smoker_status_list <- c("NonSmoker", "Smoker", "Unknown")
insurance_plan_list <- c("Perm", "Term", "UL", "ULSG", "VL", "VLSG", "Other")
age_basis_list <- c("0","1")
face_amount_band_list <- c("1-9999", "10000-24999", "25000-49999", "50000-99999", "100000-249999", "250000-499999", "500000-999999", "1000000-2499999", "2500000-4999999", "5000000-9999999", "10000000+")
soa_anticipated_level_term_period_list <- c("5 yr anticipated", "10 yr anticipated", "15 yr anticipated", "20 yr anticipated", "25 yr anticipated", "30 yr anticipated", "Unknown", "Not Level Term", "N/A (Not Term)")
soa_guaranteed_level_term_period_list <- c("5 yr guaranteed", "10 yr guaranteed", "15 yr guaranteed", "20 yr guaranteed", "25 yr guaranteed", "30 yr guaranteed", "Unknown", "Not Level Term", "N/A (Not Term)")
soa_post_level_term_indicator_list <- c("Within Level Term", "Post Level Term", "Unknown Level Term Period", "Not Level Term", "N/A (Not Term)")
select_ultimate_indicator_list <- c("Select", "Ultimate")

data_raw <- read_rds("data/data_raw.RDS")

data <-
  data_raw %>%
  clean_names() %>%
  mutate(common_company_indicator_57 = if_else(common_company_indicator_57 == 1, TRUE, FALSE),
         gender = fct_relevel(gender, gender_list),
         smoker_status = fct_relevel(smoker_status, smoker_status_list),
         insurance_plan = fct_relevel(insurance_plan, insurance_plan_list),
         age_basis = fct_recode(as.character(age_basis), anb = "0", alb = "1"),
         face_amount_band = fct_relevel(face_amount_band, face_amount_band_list),
         soa_anticipated_level_term_period = fct_relevel(soa_anticipated_level_term_period, soa_anticipated_level_term_period_list),
         soa_guaranteed_level_term_period = fct_relevel(soa_guaranteed_level_term_period, soa_guaranteed_level_term_period_list),
         soa_post_level_term_indicator = fct_relevel(soa_post_level_term_indicator, soa_post_level_term_indicator_list),
         select_ultimate_indicator = fct_relevel(select_ultimate_indicator, select_ultimate_indicator_list),
         duration_group = case_when(duration >= 0 & duration <5 ~ as.character(duration),
                                    duration >= 5 & duration < 10 ~ "5-9",
                                    duration >= 10 & duration < 15 ~ "10-14",
                                    duration >= 15 & duration < 20 ~ "15-19",
                                    duration >= 20 & duration < 25 ~ "20-24",
                                    duration >= 25 ~ "25+",
                                    TRUE ~ NA_character_) %>% as_factor() %>% fct_reorder(duration),
         issue_age_group = case_when(issue_age >= 0 & issue_age < 10 ~ "0-9",
                                     issue_age >= 10 & issue_age < 20 ~ "10-19",
                                     issue_age >= 20 & issue_age < 30 ~ "20-29",
                                     issue_age >= 30 & issue_age < 40 ~ "30-39",
                                     issue_age >= 40 & issue_age < 50 ~ "40-49",
                                     issue_age >= 50 & issue_age < 60 ~ "50-59",
                                     issue_age >= 60 & issue_age < 70 ~ "60-69",
                                     issue_age >= 70 & issue_age < 80 ~ "70-79",
                                     issue_age >= 80 & issue_age < 90 ~ "80-89",
                                     issue_age >= 90 & issue_age < 100 ~ "90-99",
                                     issue_age >= 100 ~ "100+",
                                     TRUE ~ NA_character_) %>% as_factor() %>% fct_reorder(issue_age)
  ) %>%
  select(observation_year:insurance_plan, issue_age_group, issue_age, duration_group, everything())
Houstonwp/soadac_2018 documentation built on May 9, 2019, 3:25 a.m.