#' Clean demographic data
#'
#' Cleans the raw data for age, gender, ethnicity, region, and marital status.
#'
#' @return Returns a new set of variables
#' @export
aps_clean_demographic <- function(
data
) {
# age
data <- as.data.table(data)
if("age" %in% colnames(data)) {
data[ , age := as.double(age)]
# Make agebands
data[ , age_cat := c(
"0-1",
"2-4",
"5-7",
"8-10",
"11-12",
"13-15",
"16-17",
"18-19",
"20-24",
"25-29",
"30-34",
"35-39",
"40-44",
"45-49",
"50-54",
"55-59",
"60-64",
"65-69",
"70-74",
"75-79",
"80-84",
"85-89",
"90+"
)[findInterval(age, c(-10, 2, 5, 8, 11, 13, 16, 18, seq(20, 90, 5)))]]
} else {
data[ , age := NA_real_]
}
# Select ages up to 90 years
data <- data[age_cat != "90+"]
# Calculate birth cohort
data[ , cohort := year - age]
# gender
data[ , sex := c("Male", "Female")[sex]]
# ethnicity
if ("ethukeul" %in% colnames(data)) {
data[ethukeul == 1, ethnicity_4cat := "white"]
data[ethukeul == 2, ethnicity_4cat := "mixed"]
data[ethukeul == 8, ethnicity_4cat := "black"]
data[ethukeul %in% c(3,4,5,6,7,9), ethnicity_4cat := "asian_other"]
data[ethukeul == 1, ethnicity_2cat := "white"]
data[ethukeul %in% seq(2,9,1) , ethnicity_2cat := "non-white"]
data <- subset(data,select = -c(ethukeul))
}
if ("eth01" %in% colnames(data)) {
data[eth01 == 1, ethnicity_4cat := "white"]
data[eth01 == 2, ethnicity_4cat := "mixed"]
data[eth01 == 4, ethnicity_4cat := "black"]
data[eth01 %in% c(3,5,6), ethnicity_4cat := "asian_other"]
data[eth01 == 1, ethnicity_2cat := "white"]
data[eth01 %in% c(2,3,4,5,6) , ethnicity_2cat := "non-white"]
data <- subset(data,select = -c(eth01))
}
# government office region and country variables
data[govtof == 1 , region := "North East"]
data[govtof == 2 , region := "North West"]
data[govtof == 3 , region := "North West"]
data[govtof == 4 , region := "Yorkshire and Humber"]
data[govtof == 5 , region := "East Midlands"]
data[govtof == 6 , region := "West Midlands"]
data[govtof == 7 , region := "Eastern"]
data[govtof == 8 , region := "London"]
data[govtof == 9 , region := "South East"]
data[govtof == 10, region := "South West"]
data[govtof == 11, region := "Wales"]
data[govtof == 12, region := "Scotland"]
data[govtof == 13, region := "Northern Ireland"]
data[govtof %in% c(1:10), country := "England"]
data[govtof == 11, country := "Wales"]
data[govtof == 12, country := "Scotland"]
data[govtof == 13, country := "Northern Ireland"]
data <- subset(data,select = -c(govtof))
# marital status
if ("marstt" %in% colnames(data)) {
data[marsta == 1, relationship_status := "single"]
data[marsta == 2, relationship_status := "married"]
data[marsta == 3, relationship_status := "sep_div_wid"]
data[marsta == 4, relationship_status := "sep_div_wid"]
data[marsta == 5, relationship_status := "sep_div_wid"]
data <- subset(data,select = -c(marstt))
}
if ("marsta" %in% colnames(data)) {
data[marsta == 1, relationship_status := "single"]
data[marsta == 2, relationship_status := "married"]
data[marsta == 3, relationship_status := "sep_div_wid"]
data[marsta == 4, relationship_status := "sep_div_wid"]
data[marsta == 5, relationship_status := "sep_div_wid"]
data[marsta == 6, relationship_status := "married"]
data <- subset(data,select = -c(marsta))
}
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.