# Constants ------------------------------------------------------------------
BES_FTF_URL <- "http://www.britishelectionstudy.com/wp-content/uploads/2018/01/bes_f2f_2017_v1.2.sav"
# Functions ------------------------------------------------------------------
#' Calculate estimated election turnout by demographic characteristic
#'
#' Calculates estimated election turnout by demographic characteristic for
#' British Election Study survey data.
#'
#' @param data A dataframe containing
#' \href{http://www.britishelectionstudy.com/}{British Election Study}
#' internet or face to face survey data.
#' @param dgraphic The name of the variable for the demographic characteristic
#' of interest. Must be defined between quotation marks.
#' @param vote The name of the variable which captures response from suvey
#' respondent to the question if they voted or not in the election of
#' interest. Must be defined between quotation marks.
#' @param wt The name of the weight variable to be used in analysis. Must
#' be defined between quotation marks.
#' @param validated Use when survey respondent's vote registeration has not
#' been validated by British Election Study. A boolean which if FALSE
#' filters \code{data} to show those survey respondents who are over the age
#' of 18 and say they are registered to vote. Removes 'Dont Know' responses.
#' If FALSE \code{result} must be defined. Default value is TRUE.
#' @param result Use when \code{validated} is FALSE. A numeric between 0 and 1
#' for the known turnout percentage for the election in Great Britain.
#' Default value is 0.
#' @param sample A boolean which if TRUE returns the unweighted sample size.
#' FALSE returns weighted sample size. Default value is FALSE.
#' If \code {validated} is FALSE then \code{result} must be default.
#' \code{percent} must be FALSE to show sample sizes.
#' @param percent A boolean which if FALSE does not calculate turnout
#' percentages. Default value is TRUE.
#' @param write A boolean which if TRUE creates a CSV in working directory.
#' Default value is FALSE.
#' @return A tibble or CSV containing turnout estimates for selected
#' demographic.
#' @export
#'
turnout_query <- function(data, dgraphic, vote, wt,
validated = TRUE, result = 0,
sample = FALSE, percent = TRUE,
write = FALSE) {
# Create dataframe variables in function environment
data$dgraphic <- as.character(haven::as_factor(data[[dgraphic]]))
data$vote <- as.character(haven::as_factor(data[[vote]]))
data$wt <- data[[wt]]
# Filter respondents aged over and registered
if (validated == FALSE) {
data <- dplyr::filter(data, ageGroup > 1)
data <- dplyr::filter(data, registered == 1 | registered == 2)
data <- dplyr::filter(data, genElecTurnoutRetro < 3)
}
# Weight estimates
if (sample == FALSE) {
estimate <- data %>%
dplyr::group_by(dgraphic, vote) %>%
dplyr::summarise(popest = sum(wt, na.rm = TRUE))
} else {
estimate <- data %>%
dplyr::group_by(dgraphic, vote) %>%
dplyr::summarise(popest = n())
}
# Round estimate figure
estimate$popest <- round(estimate$popest, 0)
# Create tibble in wide form and remove NA column
estimate <- estimate %>%
tidyr::spread(vote, popest, fill = 0) %>%
na.omit(estimate)
# Add column total for adjustment calculation
if (validated == FALSE) {
estimate <- cltools::add_col_totals(estimate)
}
# Calculate turnout percentages
if (percent == TRUE) {
estimate <- cltools::get_row_percent(estimate, na.rm = TRUE)
}
# Adjustment to unvalidated turnout estimates
if (result != 0) {
adjust <- result / estimate$`Yes, voted`[length(estimate$`Yes, voted`)]
estimate$`Yes, voted` <- estimate$`Yes, voted` * adjust
estimate <- dplyr::select(estimate, dgraphic, `Yes, voted`)
} else {
estimate <- dplyr::select(estimate, dgraphic, dplyr::contains("voted"))
}
# Write CSV file
if (write == TRUE) {
readr::write_csv(estimate, paste("turnout_", dgraphic, ".csv"))
} else {
estimate
}
}
#' Calculate estimated election turnout by demographic characteristic
#'
#' Calculates estimated election turnout by commonly asked demographic
#' characteristic for validated British Election Study 2017 face to face
#' survey data only.
#'
#' @param dgraphic The demographic characteristic of interest. Must be
#' defined as one of the following values: age, gender, ethnicity,
#' religion, education. Must be defined between quotation marks.
#' @param sample A boolean which if TRUE returns the unweighted sample size.
#' FALSE returns weighted sample size. Default value is FALSE.
#' \code{percent} must be FALSE to show sample sizes.
#' @inheritParams turnout_query
#' @return A tibble or CSV containing turnout estimates for selected
#' demographic.
#' @export
#'
turnout_common <- function(dgraphic = c("age", "gender",
"ethnicity", "religion", "education"),
sample = FALSE, percent = TRUE,
write = FALSE) {
# Download BES 2017 face to face survey
data <- haven::read_spss(BES_FTF_URL)
# Create and recode dataframe variables in function environment
if (dgraphic == "age") {
data$dgraphic <- as.character(haven::as_factor(data$y10_banded))
}
if (dgraphic == "gender") {
data$dgraphic <- as.character(haven::as_factor(data$y09))
}
if (dgraphic == "ethnicity") {
data$dgraphic <- dplyr::recode(as.character(data$y11), '-999' = -999,
'-2' = -2, '1' = 1, '2' = 1, '3' = 1,
'4' = 1, '5' = 2, '6' = 2, '7' = 2,
'8' = 2,'9'= 2, '10' = 2, '11' = 2,
'12' = 2, '13' = 2,'14' = 2, '15' = 2,
'16' = 2, '17' = 2, '18' = 2, '19' = 1)
data$dgraphic <- factor(data$dgraphic,
levels = c(1, 2, -2, -999),
labels = c("White", "BAME",
"Refused", "Not Stated"))
data$dgraphic <- as.character(haven::as_factor(data$dgraphic))
}
if (dgraphic == "tenure") {
data$dgraphic <- dplyr::recode(as.character(data$y03),
'-999' = -999, '-1' = -1, '1' = 1,
'2' = 2, '3' = 3, '4' = 4, '5' = 3)
data$dgraphic <- factor(data$dgraphic,
levels = c(1, 2, 3, 4, -1, -999),
labels = c("Own ouright", "Mortgage",
"Social rent", "Private rent",
"Don't Know", "Not stated"))
data$dgraphic <- as.character(haven::as_factor(data$dgraphic))
}
if (dgraphic == "religion") {
data$dgraphic <- dplyr::recode(as.character(data$y06), '0' = 0, '1' = 1,
'2' = 1, '3' = 1,
'4' = 1, '5' = 1, '6' = 1, '7' = 1,
'8' = 1, '9'= 1, '10' = 1, '11' = 1,
'12' = 2, '13' = 2, '14' = 2, '15' = 2,
'16' = 2, '17' = 2, '18' = 1, '19' = 1,
'-999' = 999, '-2' = -2)
data$dgraphic <- factor(data$dgraphic,
levels = c(0, 1, 2, -2, -999),
labels = c("No Religion", "Christian",
"Non-Christian", "Refused",
"Not Stated"))
data$dgraphic <- as.character(haven::as_factor(data$dgraphic))
}
if (dgraphic == "education") {
data$dgraphic <- dplyr::recode(as.character(data$edlevel), '0' = 0,
'1' = 1, '2' = 2, '3' = 3, '4' = 4,
'5' = 4)
data$dgraphic <- factor(data$dgraphic,
levels = c(0, 1, 2, 3, 4),
labels = c("No qualifications",
"Below GCSE",
"GCSE", "A-Level",
"Degree or higher"))
data$dgraphic <- as.character(haven::as_factor(data$dgraphic))
}
# Create validated vote variable
data$vote <- as.character(haven::as_factor(data$validatedTurnoutBinary))
# Weight estimates
if (sample == FALSE) {
estimate <- data %>%
dplyr::group_by(dgraphic, vote) %>%
dplyr::summarise(popest = sum(wt_vote_valid, na.rm = TRUE))
} else {
estimate <- data %>%
dplyr::group_by(dgraphic, vote) %>%
dplyr::summarise(popest = n())
}
# Round estimate figure
estimate$popest <- round(estimate$popest, 0)
# Create tibble in wide form and remove NA column
estimate <- estimate %>%
tidyr::spread(vote, popest, fill = 0) %>%
na.omit(estimate)
# Calculate turnout percentages
if (percent == TRUE) {
estimate <- cltools::get_row_percent(estimate, na.rm = TRUE)
} else {
estimate
}
# Remove hanging "NA" column
estimate <- dplyr::select(estimate, dgraphic, Voted, -"<NA>")
# Write CSV file
if (write == FALSE) {
estimate
} else {
readr::write_csv(estimate, paste("BES_2017_FTF_turnout_",
dgraphic, ".csv"))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.