data-raw/build_profiles.R

tidycensus::census_api_key("b508704c99f3ae9bc5b5e7c41e3dd77e59d52722")

library(tidyverse)

Sys.getenv("CENSUS_API_KEY")

#Define census query
variable <- c('DP02_0067P',
          'DP02_0092P',
          'DP05_0071P',
          'DP03_0062',
          'DP03_0005P',
          'DP05_0077P',
          'DP05_0038P',
          'DP05_0087P',
          #'DP05_0024P',
          'DP02_0066P',
          'DP05_0001')


variable_name <- c('Per_BachelorsHigher',
               'Per_ForeignBorn',
               'Per_Hispanic',
               'Median_HH_Income',
               'Per_clf_unemployed',
               'Per_White',
               'Per_Black',
               'Per_VotingAge',
               #'Per_Over65',
               'Per_LessHS',
               'Population'
               )


#Pull some general dems from profile tables
gen <-  tidycensus::get_acs(geography = 'congressional district',
                            variables = variable,
                            year = 2018,
                            survey = 'acs1') %>%
  left_join(data.frame(variable, variable_name, stringsAsFactors = FALSE))%>%
  select(-variable, -moe) %>%
  rename(variable = variable_name) %>%
  select(GEOID:NAME, variable, estimate)



#Get White-not Hispanic education levels
white_ed <- tidycensus::get_acs(geography = 'congressional district',
                            variables = c('C15002H_003','C15002H_006',
                                          'C15002H_008', 'C15002H_011'),
                            summary_var = 'C15002H_001',
                            year = 2018,
                            survey = 'acs1') %>%
  mutate(variable = ifelse(variable %in% c('C15002H_003', 'C15002H_008'),
                           'Per_LessHS_White',
                           'Per_BachelorsHigher_White'))%>%
  group_by(GEOID, NAME, variable) %>%
  summarize(estimate = sum(estimate),
            summary_est = mean(summary_est)) %>%
  mutate(estimate = round(estimate/summary_est *100, 1)) %>%
  select(-summary_est)



#Get some census codes to join
options(tigris_use_cache = TRUE, tigris_class = "sf")
us_house_districts <- tigris::congressional_districts(cb = TRUE) %>%
  select(GEOID,STATEFP, CD115FP) %>%

  left_join(tigris::states(cb = TRUE) %>%
              data.frame() %>%
              select(STATEFP, STUSPS))%>%
  mutate(CD_AREA = round(log(as.numeric(gsub(' m^2]', '', sf::st_area(.)))),3)) %>%
  data.frame() %>%
  select(-geometry) %>%
  filter(!STATEFP %in% c('78', '69', '66', '72', '60'))


uspol_dems2018_house <- gen %>%
  bind_rows(white_ed) %>%
  left_join(us_house_districts %>% select(-CD_AREA)) %>%
  spread(variable, estimate) %>%
  mutate(Per_LessHS = 100 - Per_LessHS,
         Per_VotingAge = round(Per_VotingAge/Population*100,1))%>%
  left_join(us_house_districts %>% select(GEOID, CD_AREA))%>%
  select(-Population, -NAME)%>%
  filter(!is.na(STATEFP))%>%
  gather(key = 'variable', value = 'estimate',
         Median_HH_Income:CD_AREA) %>%
  rename(district_code = CD115FP,
         state_fips=STATEFP,
         state_abbrev = STUSPS)%>%
  mutate(state_fips = as.integer(state_fips),
         district_code = as.integer(district_code))%>%
  as.tibble()



#Output
setwd("/home/jtimm/jt_work/GitHub/packages/uspoliticalextras")
usethis::use_data(uspol_dems2018_house, overwrite=TRUE)
jaytimm/uspoliticalextras documentation built on March 17, 2020, 3:44 a.m.