knitr::opts_chunk$set(echo=TRUE) library(glptools) glp_load_packages() library(arrow) library(labelled)
This calls in all of the relevant variables. A bit of context about each table is included below, also.
#B28002 - internet access at household level #this details what type of internet subscription people have, if they have internet #access without a subscription, or if they have no internet access. #this corresponds with CINETHH, CIHISPEED, and CIDIAL in IPUMS internet_household <- build_census_var_df("acs5", "B28002") #B28001 - computer in household compdevice_household <- build_census_var_df("acs5", "B28001") #B28003 - computer and internet access at houeshold level #Details if they have internet subscription (and what type) or not with a computer #or if they have no computer compinternet_household <- build_census_var_df("acs5", "B28003") #B28008 - computer and internet access at individual level #Details if they have internet subscription (and what type) or not with a computer #or if they have no computer compinternet_individual <- build_census_var_df("acs5", "B28008") #b28004 - internet presence and type by income #use acs 1 for FIPS, acs 5 for tracts incomeinternet_household <- build_census_var_df("acs1", "B28004") #b28005 - age by computer and internet - individual ageinternetcomp_individual <- build_census_var_df("acs5", "B28005") #b28006 - computer and internet by education eduinternetcomp_individual <- build_census_var_df("acs5", "B28006") #b28009 - computer and internet by race raceinternetcomp_individual <- build_census_var_df("acs5", "B28009")
Next we're going to work with the age table - pull down the data, remove total columns, and process it to make map files :
#Overall Digital Access (no disagg) (individual and household) ---- access_house <- get_census(compinternet_household, "tract") access_indiv <- get_census(compinternet_individual, "tract") # Individual digital access ---- #make column that will work for process census #B28008_005 is that row that describes "having a computer with a fixed broadband internet subscription, this excludes folks that have a computer and only dial up or cellular data. #005 covers both 006 and 007 (is cumulative of those) 008 is dial up/cellular data var_individ <- access_indiv %>% mutate( digitalaccess = case_when( variable == "B28008_001E" ~ "total", variable == "B28008_001M" ~ "total", variable == "B28008_005E" ~ "hasaccess", variable == "B28008_005M" ~ "hasaccess" ) ) #process census clean_individ <- var_individ %>% process_census(cat_var = "digitalaccess", output_name = "digitalaccess") #process map map_access <- clean_individ %>% process_map("hasaccess",return_name = "digitalaccess_nodisagg", maps=c("tract", "nh", "muw")) %>% list2env(.GlobalEnv) rm(map_access) #Household digital access comparison ---- #the %s here are going to be slightly higher because there is no differentiation between types of broadband like there is in individual. This makes me feel more confident about individual, will run this by Harrison though. Here's the process for household just in case it's wanted #B28003_004E is the "computer with a broadband internet subscription". There are no subcategories #the only options w/ a computer a dial-up, broadband, or without internet. Another arguemnt could be made that this is better because it is household and the ipums data is also household? #looking at the totals for the whole data (here:https://data.census.gov/cedsci/table?q=B28&d=ACS%201-Year%20Estimates%20Detailed%20Tables&tid=ACSDT1Y2019.B28008) #The % comes out to 86% for household and 77% for individual - i feel individual is a better descriptor of what is going on and peoples experiences var_house <- access_house %>% mutate( digitalaccess = case_when( variable == "B28003_001E" ~ "total", variable == "B28003_001M" ~ "total", variable == "B28003_004E" ~ "hasaccess", variable == "B28003_004M" ~ "hasaccess" ) ) #process census clean_house <- var_house %>% process_census(cat_var = "digitalaccess", output_name = "digitalaccess") #process map map_access_house <- clean_house %>% process_map("hasaccess",return_name = "digitalaccess_house", maps=c("tract", "nh", "muw")) %>% list2env(.GlobalEnv) #By Age ---- age <- get_census(ageinternetcomp_individual, "tract") #clear out totals totals <- c("B28005_001E","B28005_001M","B28005_002E", "B28005_008E", "B28005_014E","B28005_002M", "B28005_008M", "B28005_014M", "B28005_009E", "B28005_015E", "B28005_003E", "B28005_009M", "B28005_015M", "B28005_003M") age_nototal <- age %>% subset(variable %not_in% totals) #make categorical variable age_nototal %<>% mutate(compinternet = if_else(str_detect(label,"broadband Internet subscription"), T, F)) #process census clean_age <- age_nototal %>% process_census(cat_var = "compinternet", output_name = "digitalaccess", age_groups = c("under_18", "18_64", "65_plus"),age_group_wide = TRUE) #< one option #process map map_age <- clean_age %>% process_map("under_18", "18_64","65_plus",return_name = "digitalaccess_by_age", maps=c("tract", "nh", "muw")) %>% list2env(.GlobalEnv) rm(map_age) #By Income ---- #by income we only have internet access not internet and computer access income <- get_census(incomeinternet_household, "FIPS") #create variable column for each income class income_access <- income %>% mutate( less10 = case_when( variable == "B28004_002E" ~"total", variable == "B28004_002M" ~"total", variable == "B28004_004E" ~"less10", variable == "B28004_004M" ~"less10" ), "10_19" =case_when( variable == "B28004_006E" ~"total", variable == "B28004_006M" ~"total", variable == "B28004_008E" ~"10_19", variable == "B28004_008M" ~"10_19" ), "20_34" =case_when( variable == "B28004_010E" ~"total", variable == "B28004_010M" ~"total", variable == "B28004_012E" ~"20_34", variable == "B28004_012M" ~"20_34" ), "35_49" =case_when( variable == "B28004_014E" ~"total", variable == "B28004_014M" ~"total", variable == "B28004_016E" ~"35_49", variable == "B28004_016M" ~"35_49" ), "50_74" =case_when( variable == "B28004_018E" ~"total", variable == "B28004_018M" ~"total", variable == "B28004_020E" ~"50_74", variable == "B28004_020M" ~"50_74" ), plus_75 =case_when( variable == "B28004_022E" ~"total", variable == "B28004_022M" ~"total", variable == "B28004_024E" ~"plus_75", variable == "B28004_024M" ~"plus_75" ) ) #for loop to process_census variables <- list("less10","10_19","20_34", "35_49","50_74","plus_75") for(x in variables) { df <- income_access %>% process_census(cat_var=x) income_internet <- assign_col_join(income_internet,df) rm(df) }
The data we're calling in here has already been run through microdata.rmd
#read data - change this to wherever this is stored locally on your computer acs_micro <- feather::read_feather("/Users/mraisle/Documents/OneDrive - University of North Carolina at Chapel Hill/glpdata/data-raw/microdata/acs_micro_repwts.feather") #make logical variables for the metrics we care about #the method i took here is a bit long winded . . . case_when worked best to get the binary #that I wanted for some variables but T,F was how I wanted it evaluated for survey_by_demog, hence this approach acs_micro_internet <- acs_micro %>% filter(year > 2012) %>% filter(PERNUM == 1) %>% #only measure each household once mutate( MSA = as.character(MSA), int_acc = case_when( CINETHH == 0 ~ NA_real_, CINETHH == 1 | CINETHH == 2 ~ 1, CINETHH == 3 ~ 0), hspd_int = case_when( CINETHH == 0 ~ NA_real_, CINETHH == 3 | CIHISPEED == 0 ~ 0, #doing or doesn't remove columns, and does? something like that CIHISPEED > 9 & CIHISPEED < 20 ~ 1, CIHISPEED == 20 ~ 0 ), #internet but not highspeed - this one doesn't work - wrong order #regular_internet = case_when( # int_acc == T ~ 1, #CINETHH == 3 & CIHISPEED == 0 ~ 0, # CIHISPEED > 9 & CIHISPEED < 20 ~ 0, # ), #^this one is clearly not working so let's try a different configuration regular_internet = case_when( (hspd_int == 0 | is.na(hspd_int)) & int_acc == 1 ~ 1, hspd_int == 1 ~ 0, int_acc == 0 ~ 0), computer = if_else(CILAPTOP == 1, T, F), computer = replace(computer, CILAPTOP == 0, NA), tablet = if_else(CITABLET == 1, T, F), tablet = replace(tablet, CITABLET == 0, NA), smrtphone = if_else(CISMRTPHN == 1, T, F), smrtphone = replace(smrtphone, CISMRTPHN == 0, NA), comp_tab = case_when( computer == 1 | tablet == 1 ~ 1, computer == 0 & tablet == 0 ~ 0, computer == 0 & is.na(tablet) ~ 0, TRUE ~ NA_real_ ), #ok so below is if someone has a computer,tablet, or smartphone comp_tab_smrt = case_when( computer == 1 | tablet == 1 | smrtphone == 1 ~ 1, computer == 0 & tablet == 0 & smrtphone == 0 ~ 0, computer == 0 & is.na(tablet) & is.na(smrtphone) ~ 0, TRUE ~ NA_real_ ), #this is seeing if someone has a computer or tablet and high speed internet (This is #1) internet_and_device1 = case_when( comp_tab == 1 & hspd_int == 1 ~ 1, comp_tab == 0 | hspd_int == 0 ~ 0, TRUE ~ NA_real_), #internet and computer internet_and_computer = case_when( computer == 1 & hspd_int == 1 ~ 1, computer == 0 | hspd_int == 0 ~ 0, TRUE ~ NA_real_), #seeing if someone has a computer or tablet and non-highspeed internet (#2) nohiinternet_and_device2 = case_when( comp_tab == 1 & regular_internet == 1 ~ 1, comp_tab == 0 | regular_internet == 0 ~ 0, TRUE ~ NA_real_), #device but no internet or smartphone (#3) only_device3 = case_when( comp_tab == 1 & int_acc == F & (smrtphone == F |is.na(smrtphone)) ~ 1, comp_tab == 0 | int_acc == T | smrtphone == T ~ 0, TRUE ~ NA_real_), #have smartphone and internet but no device (#4) smrt_and_internet4 = case_when( comp_tab == 0 & int_acc == T & smrtphone == T ~ 1, comp_tab == 1 | int_acc == F | (smrtphone == F |is.na(smrtphone)) ~ 0, TRUE ~ NA_real_), #internet and no device or phone (#5) only_internet5 = case_when( int_acc == T & comp_tab_smrt== 0 ~ 1, int_acc == F | comp_tab_smrt == 1 ~ 0, TRUE ~ NA_real_), #no device or internet but yes smartphone (#6) only_smartphone6 = case_when( int_acc == F & comp_tab== 0 & smrtphone == T ~ 1, int_acc == T | comp_tab == 1 | (smrtphone == F |is.na(smrtphone)) ~ 0, TRUE ~ NA_real_), #you got nothing - no device, no internet, no phone (#7) nothing7 = case_when( int_acc == F & comp_tab_smrt == 0 ~ 1, int_acc == T | comp_tab_smrt == 1 ~ 0, TRUE ~ NA_real_), #no internet but you have a device and a phone (#8) device_smrt_only8 = case_when( int_acc == F & comp_tab== 1 & smrtphone == T ~ 1, int_acc == T | comp_tab == 0 | (smrtphone == F |is.na(smrtphone)) ~ 0, TRUE ~ NA_real_), total = internet_and_device1 + nohiinternet_and_device2 + only_device3 + smrt_and_internet4 + only_internet5 + only_smartphone6 + nothing7 + device_smrt_only8 ) #check to make sure all the calculations turned up right by running: #unique(acs_micro_internet$total) . . .the only values should be 1 or NA #You can then remove the total column if you want, it is no longer necessary acs_micro_internet %<>% mutate(total=NULL) #make column that has name of the group in actual cell #acs_micro_internet %<>% mutate( #access_type = case_when( # internet_and_device == 1 ~ "internet_and_device1", # nohiinternet_and_device2 == 1 ~ "nohiinternet_and_device2", # only_device3 == 1 ~ "only_device3", #smrt_and_internet4 == 1 ~ "smrt_and_internet4", #only_internet5 == 1 ~ "only_internet5", # only_smartphone6 == 1 ~ "only_smartphone6", # nothing7 == 1 ~ "nothing7", # device_smrt_only8 == 1 ~ "device_smrt_only8", # TRUE ~ NA_character_)) #convert necessary ones to true false - clean this up later - having trouble making a for loop work for this #add internet access here acs_micro_internet$int_acc %<>% as.logical(acs_micro_internet$int_acc) acs_micro_internet$internet_and_device1 %<>% as.logical(acs_micro_internet$internet_and_device1) acs_micro_internet$comp_tab_smrt %<>% as.logical(acs_micro_internet$comp_tab_smrt) acs_micro_internet$comp_tab %<>% as.logical(acs_micro_internet$comp_tab) acs_micro_internet$hspd_int %<>% as.logical(acs_micro_internet$hspd_int) acs_micro_internet$regular_internet %<>% as.logical(acs_micro_internet$regular_internet) acs_micro_internet$nohiinternet_and_device2 %<>% as.logical(acs_micro_internet$nohiinternet_and_device2) acs_micro_internet$only_device3 %<>% as.logical(acs_micro_internet$only_device3) acs_micro_internet$smrt_and_internet4 %<>% as.logical(acs_micro_internet$smrt_and_internet4) acs_micro_internet$only_internet5 %<>% as.logical(acs_micro_internet$only_internet5) acs_micro_internet$only_smartphone6 %<>% as.logical(acs_micro_internet$only_smartphone6) acs_micro_internet$nothing7 %<>% as.logical(acs_micro_internet$nothing7) acs_micro_internet$device_smrt_only8 %<>% as.logical(acs_micro_internet$device_smrt_only8) acs_micro_internet$internet_and_computer %<>% as.logical(acs_micro_internet$internet_and_computer) rm(acs_micro)
Now we're going to process this using survey by demographic, make dataframes for county and MSA level, and merge all the variables back into one dataframe for saving.
#need to remember to make NA everything 2013-2015 that include tablet or smartphone #make vector of variables variables <- list("int_acc","hspd_int", "regular_internet","comp_tab", "comp_tab_smrt", "internet_and_device1", "internet_and_computer","nohiinternet_and_device2", "only_device3", "smrt_and_internet4","only_internet5","only_smartphone6","nothing7","device_smrt_only8") #make county dataframe with all the variables for(x in variables) { df <- survey_by_demog(acs_micro_internet, x, weight_var = "HHWT") digitalaccess_county <- assign_col_join(digitalaccess_county, df) rm(df) } #make the categories that related to smartphones null for all years < 2015 for county #variables2016 <- list("device_smrt_only8","nothing7","only_device3","only_internet5", # "only_smartphone6","smrt_and_internet4") #for(x in variables2016) { # digitalaccess_county[[x]] <- ifelse(digitalaccess_county$year > 2015, # digitalaccess_county[[x]], NA) #} #make percent only dataframe - easier to work with graphs this way percentdigitalaccess_county <- digitalaccess_county %>% filter(var_type=="percent") %>% mutate( var_type = NULL) #make MSA dataframe with all the variables for(x in variables) { df <- survey_by_demog(acs_micro_internet, x, geog="MSA", weight_var = "HHWT") digitalaccess_MSA5yr <- assign_col_join(digitalaccess_MSA5yr, df) rm(df) } #make the categories that related to smartphones null for all years < 2015 for MSA #for(x in variables2016) { #digitalaccess_MSA5yr[[x]] <- ifelse(digitalaccess_MSA5yr$year > 2015, # digitalaccess_MSA5yr[[x]], NA) #}
Finally, we're going to save all of these files to glpdata
``` {r Saving_Data, eval=FALSE}
usethis::use_data(digitalaccess_county, percentdigitalaccess_county, digitalaccess_MSA5yr, digitalaccess_by_age_muw, digitalaccess_by_age_nh, digitalaccess_by_age_tract, digitalaccess_nodisagg_muw,digitalaccess_nodisagg_nh, digitalaccess_nodisagg_tract, income_internet, overwrite = TRUE) ```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.