data-raw/build_returns.R

#Collate - restructure return data via

library(tidyverse)

pres_returns <- read.csv(url("https://raw.githubusercontent.com/MEDSL/constituency-returns/master/1976-2016-president.csv"),
                     stringsAsFactors = FALSE) %>%
  select(year:state_fips, candidate:party, candidatevotes, totalvotes)%>%
  mutate(party = gsub('-farmer-labor', '', party),
         candidate = gsub('(^.*)(, )(.*$)', '\\3 \\1', candidate))  %>%

  #mutate(party = ifelse(grepl('democratic-',party), 'democrat', party)) %>%
  mutate(party = ifelse(party %in% c('republican', 'democrat', 'independent'),
                        party, 'other'))

Encoding(pres_returns$candidate) <- 'UTF-8'

pres_returns <- pres_returns %>%
  group_by(year, state, state_po, state_fips, candidate, totalvotes) %>%
  summarize(candidatevotes = sum(candidatevotes),
            party = paste0(party, collapse = ' | ')) %>%
  ungroup()

winners <- pres_returns %>%
  group_by(year, state) %>%
  filter(candidatevotes == max(candidatevotes)) %>%
  select(-candidatevotes) %>%
  mutate(party = ifelse(grepl('republican', party), 'republican', party),
         party = ifelse(party == 'democrat', 'Democratic Party', 'Republican Party'))

uspol_medsl_returns_pres_state <- pres_returns %>%
  filter(!is.na(candidate)) %>%
  mutate(party = ifelse(grepl('democrat', party), 'democrat', party)) %>%
  mutate(party = ifelse(grepl('republican', party), 'republican', party)) %>%
  mutate(party = ifelse(grepl('other', party), 'other', party)) %>%
  group_by(year, state, party) %>%
  filter(candidatevotes == max(candidatevotes)) %>%
  ungroup() %>%
  mutate(candidatevotes = round(candidatevotes/totalvotes *100,2)) %>%
  select(-candidate) %>%
  spread(party, candidatevotes) %>%
  left_join(winners) %>%
  replace(., is.na(.), 0) %>%
  #select(year, congress, GEOID, state:bioname, party_name) %>%
  as.tibble()


#Follow MEDSL convention per at-large Reps as '0'
  house_returns <- read.csv(url("https://raw.githubusercontent.com/MEDSL/constituency-returns/master/1976-2018-house.csv"),
                            stringsAsFactors = FALSE) %>%

  filter(special == FALSE) %>% #new

  select(year:state_po, state_fips, district, special,
           candidate:party, candidatevotes,
           totalvotes, unofficial) %>%  ## totalvotes is wrong in original data.

  #mutate(district = ifelse(state_po == 'ND'|district==0, 1, district))%>%

  mutate(party = ifelse(grepl('democra', party),
                         'democrat', party)) %>%
  mutate(party = ifelse(grepl('republican|tax revolt|reform', party),
                         'republican', party)) %>%
  mutate(party = ifelse(grepl('Cedric L\\. Richmond|Joseph D\\. Early|Bennie G\\. Thompson', party),
                         'democrat', party)) %>%

  mutate(party = ifelse(party==''|is.na(party), 'republican', party)) %>%
  mutate(party = ifelse(party %in% c('republican', 'democrat', 'independent'), party, 'other')) %>%

  # Need to compute new totalvotes here -- via aggregating independent candidate votes
  group_by(year, state, state_po, state_fips,
           district, special) %>%
  mutate(totalvotes = sum(candidatevotes)) %>%
  ungroup() %>%

  filter(!is.na(candidate)) %>%
  #group_by(year, state, state_po, state_fips,
  #         district, special, party) %>%

  #filter(totalvotes == max(totalvotes)) %>% ## This was original totalvotes solution -- wrong --

  #ungroup() %>%
  distinct() #Addresses duplicate entries for 2018


## Correct Lee Zeldin issue here.
house_returns <- house_returns %>%
  group_by(year, state, state_po, state_fips, district, special, candidate, unofficial, totalvotes) %>%
  summarize(candidatevotes = sum(candidatevotes),
            party = paste0(party, collapse = ' | ')) %>%
  ungroup()
house_returns$candidate <- enc2native(house_returns$candidate)

## ID winners
winners_house <- house_returns %>%
  group_by(year, state, district, special) %>%
  filter(candidatevotes == max(candidatevotes)) %>%
  select(-candidatevotes, -totalvotes) %>%
  ungroup()


## homogenize names & party via voteview
vv <- lapply(c(95:116), function (x)
                    Rvoteview::member_search (
                      chamber = 'House',
                      congress = x)) %>%
  bind_rows()


vv <- vv %>%
  group_by(congress, state) %>%
  mutate(x = length(unique(district_code))) %>%
  ungroup() %>%
  mutate(district_code = ifelse(x==1, 0, district_code)) %>%
  select(-x)

cross <- data.frame(year = c(1974 + 2*rep(c(1:22))), congress = c(95:116),
           stringsAsFactors = FALSE)

vv1 <- vv %>% filter(chamber == 'House') %>%
  mutate(candidate = gsub('-', ' ', seo_name)) %>%
  left_join(cross)

z <- data.frame()
for (i in 1:nrow (winners_house)) {
  x1 <- winners_house[i,]
  x2 <- subset(vv1, year == x1$year &
                 state_abbrev == x1$state_po &
                 district_code == x1$district)
  x3 <- which.min(adist(x1$candidate, x2$candidate, ignore.case = TRUE))
  z <- bind_rows(z, x2[x3,]) }

winners_house1  <- winners_house %>%
  left_join(z %>% select(bioname, congress, state_abbrev,
                   district_code, year, party_name),
            by = c('year' = 'year', 'state_po' = 'state_abbrev',
                   'district' = 'district_code')) %>% #, 'special' = 'special'
  select(year, state_po, district, special, bioname:party_name)


uspol_medsl_returns_house_cd<- house_returns %>%
  #filter(!is.na(candidate)) %>%
  mutate(party = ifelse(grepl('democrat', party), 'democrat', party)) %>%
  mutate(party = ifelse(grepl('republican', party), 'republican', party)) %>%
  mutate(party = ifelse(grepl('other', party), 'other', party)) %>%
  group_by(year, state, state_po, state_fips, district, special, party) %>%

  filter(candidatevotes == max(candidatevotes)) %>%
  ungroup() %>%
  mutate(candidatevotes = round(candidatevotes/totalvotes *100,2)) %>%
  select(-candidate) %>%
  spread(party, candidatevotes) %>%
  left_join(winners_house1) %>%
  replace(., is.na(.), 0) %>%
  rename(state_abbrev = state_po,
         district_code = district)%>%
  mutate(GEOID = paste0(stringr::str_pad (state_fips,2, pad = 0),
                        stringr::str_pad (district_code,2, pad = 0))) %>%
  select(year, congress, GEOID, state:bioname, party_name) %>%
  as.tibble() %>%
  rename(candidate = bioname,
         party = party_name)



##Senate RETURNS
senate_returns <- read.csv(url("https://raw.githubusercontent.com/MEDSL/constituency-returns/master/1976-2018-senate.csv"),
                          stringsAsFactors = FALSE) %>%

  filter(stage == 'gen' & special == FALSE) %>%

  select(year:state_fips, special,candidate:party,
         candidatevotes, totalvotes, unofficial) %>%

  mutate(party = ifelse(grepl('democratic-',party),
                        'democrat', party)) %>%
  mutate(party = ifelse(party %in% c('republican', 'democrat', 'independent'),
                        party, 'other')) %>%
  #group_by(year, state, state_po, special, party) %>%
  #filter(totalvotes == max(totalvotes)) %>%
  ungroup() #%>%
  #distinct()

Encoding(senate_returns$candidate) <- 'UTF-8'


## Correct Lee Zeldin issue here.
senate_returns  <- senate_returns  %>%
  group_by(year, state, state_po, state_fips, special, candidate, unofficial, totalvotes) %>%
  summarize(candidatevotes = sum(candidatevotes),
            party = paste0(party, collapse = ' | ')) %>%
  ungroup()

## ID winners
winners_senate <- senate_returns %>%
  group_by(year, state, special) %>%
  filter(candidatevotes == max(candidatevotes)) %>%
  select(-candidatevotes, -totalvotes)%>%
  ungroup()  ## This is correct - breaks downstream


## homogenize names & party via voteview
vv <- lapply(c(95:116), function (x)
  Rvoteview::member_search (
    chamber = 'Senate',
    congress = x)) %>%
  bind_rows()

cross <- data.frame(year = c(1974 + 2*rep(c(1:22))), congress = c(95:116),
                    stringsAsFactors = FALSE)

vv1 <- vv %>% filter(chamber == 'Senate') %>%
  mutate(last = gsub(', .*$', '', bioname),
         first = gsub('^.*[A-Z], ', '', bioname),
         first = gsub(' .*$', '', first),
         candidate = ifelse(grepl(', Jr\\.$', bioname),
                            paste0(first, ' ', last, ' Jr.'),
                            paste0(first, ' ', last))) %>%
  #mutate(candidate = gsub('-', ' ', seo_name)) %>%
  left_join(cross) # Correct

## Issue is 2 senators per year.  No class info, or unique senator id in medsl data.


## What are we doing here?
z <- data.frame()
for (i in 1:nrow (winners_senate)) {
  x1 <- winners_senate[i,]
  x2 <- subset(vv1, year == x1$year &
                 state_abbrev == x1$state_po)
  x3 <- which.min(adist(x1$candidate, x2$candidate, ignore.case = TRUE))
  z <- bind_rows(z, x2[x3,])

  } ## z is wrong.



winners_senate1  <- winners_senate %>%
  left_join(z %>% select(bioname, congress, state_abbrev, year, party_name),
            by = c('year' = 'year', 'state_po' = 'state_abbrev')) %>%
  select(year, state_po, special, bioname:party_name)



uspol_medsl_returns_senate_state <- senate_returns %>%
  filter(!is.na(candidate)) %>%
  mutate(party = ifelse(grepl('democrat', party), 'democrat', party)) %>%
  mutate(party = ifelse(grepl('republican', party), 'republican', party)) %>%
  mutate(party = ifelse(grepl('other', party), 'other', party)) %>%
  group_by(year, state, state_po, party) %>%

  filter(candidatevotes == max(candidatevotes)) %>%
  ungroup() %>%
  mutate(candidatevotes = round(candidatevotes/totalvotes *100,2)) %>%
  select(-candidate) %>%
  spread(party, candidatevotes) %>%
  left_join(winners_senate1) %>%
  replace(., is.na(.), 0) %>%
  rename(state_abbrev = state_po) %>%
  select(year, congress, state:bioname, party_name) %>%
  as.tibble()  %>%
  rename(candidate = bioname,
         party = party_name)


#Output
setwd("/home/jtimm/jt_work/GitHub/packages/uspoliticalextras")
usethis::use_data(uspol_medsl_returns_house_cd, overwrite=TRUE)
usethis::use_data(uspol_medsl_returns_pres_state, overwrite=TRUE)
usethis::use_data(uspol_medsl_returns_senate_state, overwrite=TRUE)
jaytimm/uspoliticalextras documentation built on March 17, 2020, 3:44 a.m.