#Collate - restructure return data via
library(tidyverse)
pres_returns <- read.csv(url("https://raw.githubusercontent.com/MEDSL/constituency-returns/master/1976-2016-president.csv"),
stringsAsFactors = FALSE) %>%
select(year:state_fips, candidate:party, candidatevotes, totalvotes)%>%
mutate(party = gsub('-farmer-labor', '', party),
candidate = gsub('(^.*)(, )(.*$)', '\\3 \\1', candidate)) %>%
#mutate(party = ifelse(grepl('democratic-',party), 'democrat', party)) %>%
mutate(party = ifelse(party %in% c('republican', 'democrat', 'independent'),
party, 'other'))
Encoding(pres_returns$candidate) <- 'UTF-8'
pres_returns <- pres_returns %>%
group_by(year, state, state_po, state_fips, candidate, totalvotes) %>%
summarize(candidatevotes = sum(candidatevotes),
party = paste0(party, collapse = ' | ')) %>%
ungroup()
winners <- pres_returns %>%
group_by(year, state) %>%
filter(candidatevotes == max(candidatevotes)) %>%
select(-candidatevotes) %>%
mutate(party = ifelse(grepl('republican', party), 'republican', party),
party = ifelse(party == 'democrat', 'Democratic Party', 'Republican Party'))
uspol_medsl_returns_pres_state <- pres_returns %>%
filter(!is.na(candidate)) %>%
mutate(party = ifelse(grepl('democrat', party), 'democrat', party)) %>%
mutate(party = ifelse(grepl('republican', party), 'republican', party)) %>%
mutate(party = ifelse(grepl('other', party), 'other', party)) %>%
group_by(year, state, party) %>%
filter(candidatevotes == max(candidatevotes)) %>%
ungroup() %>%
mutate(candidatevotes = round(candidatevotes/totalvotes *100,2)) %>%
select(-candidate) %>%
spread(party, candidatevotes) %>%
left_join(winners) %>%
replace(., is.na(.), 0) %>%
#select(year, congress, GEOID, state:bioname, party_name) %>%
as.tibble()
#Follow MEDSL convention per at-large Reps as '0'
house_returns <- read.csv(url("https://raw.githubusercontent.com/MEDSL/constituency-returns/master/1976-2018-house.csv"),
stringsAsFactors = FALSE) %>%
filter(special == FALSE) %>% #new
select(year:state_po, state_fips, district, special,
candidate:party, candidatevotes,
totalvotes, unofficial) %>% ## totalvotes is wrong in original data.
#mutate(district = ifelse(state_po == 'ND'|district==0, 1, district))%>%
mutate(party = ifelse(grepl('democra', party),
'democrat', party)) %>%
mutate(party = ifelse(grepl('republican|tax revolt|reform', party),
'republican', party)) %>%
mutate(party = ifelse(grepl('Cedric L\\. Richmond|Joseph D\\. Early|Bennie G\\. Thompson', party),
'democrat', party)) %>%
mutate(party = ifelse(party==''|is.na(party), 'republican', party)) %>%
mutate(party = ifelse(party %in% c('republican', 'democrat', 'independent'), party, 'other')) %>%
# Need to compute new totalvotes here -- via aggregating independent candidate votes
group_by(year, state, state_po, state_fips,
district, special) %>%
mutate(totalvotes = sum(candidatevotes)) %>%
ungroup() %>%
filter(!is.na(candidate)) %>%
#group_by(year, state, state_po, state_fips,
# district, special, party) %>%
#filter(totalvotes == max(totalvotes)) %>% ## This was original totalvotes solution -- wrong --
#ungroup() %>%
distinct() #Addresses duplicate entries for 2018
## Correct Lee Zeldin issue here.
house_returns <- house_returns %>%
group_by(year, state, state_po, state_fips, district, special, candidate, unofficial, totalvotes) %>%
summarize(candidatevotes = sum(candidatevotes),
party = paste0(party, collapse = ' | ')) %>%
ungroup()
house_returns$candidate <- enc2native(house_returns$candidate)
## ID winners
winners_house <- house_returns %>%
group_by(year, state, district, special) %>%
filter(candidatevotes == max(candidatevotes)) %>%
select(-candidatevotes, -totalvotes) %>%
ungroup()
## homogenize names & party via voteview
vv <- lapply(c(95:116), function (x)
Rvoteview::member_search (
chamber = 'House',
congress = x)) %>%
bind_rows()
vv <- vv %>%
group_by(congress, state) %>%
mutate(x = length(unique(district_code))) %>%
ungroup() %>%
mutate(district_code = ifelse(x==1, 0, district_code)) %>%
select(-x)
cross <- data.frame(year = c(1974 + 2*rep(c(1:22))), congress = c(95:116),
stringsAsFactors = FALSE)
vv1 <- vv %>% filter(chamber == 'House') %>%
mutate(candidate = gsub('-', ' ', seo_name)) %>%
left_join(cross)
z <- data.frame()
for (i in 1:nrow (winners_house)) {
x1 <- winners_house[i,]
x2 <- subset(vv1, year == x1$year &
state_abbrev == x1$state_po &
district_code == x1$district)
x3 <- which.min(adist(x1$candidate, x2$candidate, ignore.case = TRUE))
z <- bind_rows(z, x2[x3,]) }
winners_house1 <- winners_house %>%
left_join(z %>% select(bioname, congress, state_abbrev,
district_code, year, party_name),
by = c('year' = 'year', 'state_po' = 'state_abbrev',
'district' = 'district_code')) %>% #, 'special' = 'special'
select(year, state_po, district, special, bioname:party_name)
uspol_medsl_returns_house_cd<- house_returns %>%
#filter(!is.na(candidate)) %>%
mutate(party = ifelse(grepl('democrat', party), 'democrat', party)) %>%
mutate(party = ifelse(grepl('republican', party), 'republican', party)) %>%
mutate(party = ifelse(grepl('other', party), 'other', party)) %>%
group_by(year, state, state_po, state_fips, district, special, party) %>%
filter(candidatevotes == max(candidatevotes)) %>%
ungroup() %>%
mutate(candidatevotes = round(candidatevotes/totalvotes *100,2)) %>%
select(-candidate) %>%
spread(party, candidatevotes) %>%
left_join(winners_house1) %>%
replace(., is.na(.), 0) %>%
rename(state_abbrev = state_po,
district_code = district)%>%
mutate(GEOID = paste0(stringr::str_pad (state_fips,2, pad = 0),
stringr::str_pad (district_code,2, pad = 0))) %>%
select(year, congress, GEOID, state:bioname, party_name) %>%
as.tibble() %>%
rename(candidate = bioname,
party = party_name)
##Senate RETURNS
senate_returns <- read.csv(url("https://raw.githubusercontent.com/MEDSL/constituency-returns/master/1976-2018-senate.csv"),
stringsAsFactors = FALSE) %>%
filter(stage == 'gen' & special == FALSE) %>%
select(year:state_fips, special,candidate:party,
candidatevotes, totalvotes, unofficial) %>%
mutate(party = ifelse(grepl('democratic-',party),
'democrat', party)) %>%
mutate(party = ifelse(party %in% c('republican', 'democrat', 'independent'),
party, 'other')) %>%
#group_by(year, state, state_po, special, party) %>%
#filter(totalvotes == max(totalvotes)) %>%
ungroup() #%>%
#distinct()
Encoding(senate_returns$candidate) <- 'UTF-8'
## Correct Lee Zeldin issue here.
senate_returns <- senate_returns %>%
group_by(year, state, state_po, state_fips, special, candidate, unofficial, totalvotes) %>%
summarize(candidatevotes = sum(candidatevotes),
party = paste0(party, collapse = ' | ')) %>%
ungroup()
## ID winners
winners_senate <- senate_returns %>%
group_by(year, state, special) %>%
filter(candidatevotes == max(candidatevotes)) %>%
select(-candidatevotes, -totalvotes)%>%
ungroup() ## This is correct - breaks downstream
## homogenize names & party via voteview
vv <- lapply(c(95:116), function (x)
Rvoteview::member_search (
chamber = 'Senate',
congress = x)) %>%
bind_rows()
cross <- data.frame(year = c(1974 + 2*rep(c(1:22))), congress = c(95:116),
stringsAsFactors = FALSE)
vv1 <- vv %>% filter(chamber == 'Senate') %>%
mutate(last = gsub(', .*$', '', bioname),
first = gsub('^.*[A-Z], ', '', bioname),
first = gsub(' .*$', '', first),
candidate = ifelse(grepl(', Jr\\.$', bioname),
paste0(first, ' ', last, ' Jr.'),
paste0(first, ' ', last))) %>%
#mutate(candidate = gsub('-', ' ', seo_name)) %>%
left_join(cross) # Correct
## Issue is 2 senators per year. No class info, or unique senator id in medsl data.
## What are we doing here?
z <- data.frame()
for (i in 1:nrow (winners_senate)) {
x1 <- winners_senate[i,]
x2 <- subset(vv1, year == x1$year &
state_abbrev == x1$state_po)
x3 <- which.min(adist(x1$candidate, x2$candidate, ignore.case = TRUE))
z <- bind_rows(z, x2[x3,])
} ## z is wrong.
winners_senate1 <- winners_senate %>%
left_join(z %>% select(bioname, congress, state_abbrev, year, party_name),
by = c('year' = 'year', 'state_po' = 'state_abbrev')) %>%
select(year, state_po, special, bioname:party_name)
uspol_medsl_returns_senate_state <- senate_returns %>%
filter(!is.na(candidate)) %>%
mutate(party = ifelse(grepl('democrat', party), 'democrat', party)) %>%
mutate(party = ifelse(grepl('republican', party), 'republican', party)) %>%
mutate(party = ifelse(grepl('other', party), 'other', party)) %>%
group_by(year, state, state_po, party) %>%
filter(candidatevotes == max(candidatevotes)) %>%
ungroup() %>%
mutate(candidatevotes = round(candidatevotes/totalvotes *100,2)) %>%
select(-candidate) %>%
spread(party, candidatevotes) %>%
left_join(winners_senate1) %>%
replace(., is.na(.), 0) %>%
rename(state_abbrev = state_po) %>%
select(year, congress, state:bioname, party_name) %>%
as.tibble() %>%
rename(candidate = bioname,
party = party_name)
#Output
setwd("/home/jtimm/jt_work/GitHub/packages/uspoliticalextras")
usethis::use_data(uspol_medsl_returns_house_cd, overwrite=TRUE)
usethis::use_data(uspol_medsl_returns_pres_state, overwrite=TRUE)
usethis::use_data(uspol_medsl_returns_senate_state, overwrite=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.