knitr::opts_chunk$set(echo = TRUE, message = FALSE, eval = FALSE)
ACS 1-year survey provides the most current data of area areas with population over 65000. The data is less accurate than those in ACS 5-years survey. For states, metropolitan areas, large counties, and large cities, however, the accuracy is less a concern as the population is very big.
The state files include common summary levels of state, county, county subdivision, and place, as well as others such as congressional district and school distric. Run search_summarylevels("acs")
to view complete summury levels.
The US files include commone summary levels of metropolitan statistic areas and combined statistic areas.
Example 1 - use argument areas
: The easiest way to get the data of cities, counties, metros, and towns is to specify argument areas
in function read_acs1year()
. Run search_tablecontents("acs", "detailed race")
to select the references of table contents of table C02003, NOT B02003, of which no data is provided.
library(totalcensus) library(data.table) library(dplyr) library(ggplot2) library(ggmap) goog_api <- Sys.getenv("GOOGLE_API_KEY") register_google(goog_api) # in selected counties county_race <- read_acs1year( year = 2016, states = c("MA", "RI"), table_contents = c( "white = C02003_003", "black = C02003_004", "asian = C02003_006" ), areas = c( "Providence county, RI", "Bristol county, MA", "Kent county, RI" ), summary_level = "county" ) # area GEOID lon lat state population white black asian GEOCOMP SUMLEV NAME # 1: Providence County, RI 05000US44007 -71.57824 41.87049 RI 633673 460377 58806 26713 all 050 Providence County, Rhode Island # 2: Bristol County, MA 05000US25005 -71.08706 41.74858 MA 558324 469087 23337 13414 all 050 Bristol County, Massachusetts # 3: Kent County, RI 05000US44003 -71.57631 41.67775 RI 164614 151589 2738 3675 all 050 Kent County, Rhode Island # in selected cities city_race <- read_acs1year( year = 2016, states = c("MA", "RI"), table_contents = c( "white = C02003_003", "black = C02003_004", "asian = C02003_006" ), areas = c("Boston city, MA", "Providence city, RI"), summary_level = "place" ) # area GEOID lon lat state population white black asian GEOCOMP SUMLEV NAME # 1: Boston city, MA 16000US2507000 -71.02017 42.33196 MA 672840 358210 173312 65982 all 160 Boston city, Massachusetts # 2: Providence city, RI 16000US4459000 -71.41878 41.82306 RI 179214 94085 23938 10651 all 160 Providence city, Rhode Island # in selected metro. ACS 1-year survey metro data is in national files metro_race <- read_acs1year( year = 2016, states = "US", table_contents = c( "white = C02003_003", "black = C02003_004", "asian = C02003_006" ), areas = c("Boston metro", "Providence metro", "new york metro"), summary_level = "310" ) # area GEOID lon lat state population white black asian GEOCOMP SUMLEV NAME # 1: Boston metro 31000US14460 -71.02310 42.51750 4794447 3671796 391465 373970 all 310 Boston-Cambridge-Newton, MA-NH Metro Area # 2: Providence metro 31000US39300 -71.28505 41.70527 1614750 1319192 90863 50289 all 310 Providence-Warwick, RI-MA Metro Area # 3: New York metro 31000US35620 -73.87450 40.77432 20153634 11704438 3430610 2215765 all 310 New York-Newark-Jersey City, NY-NJ-PA Metro Area
Example 2 - use argement geo_headers
: while areas
only available for metros, counties, cities, and towns, geo_headers
is appilicable for all geographic headers. The workflow for using geo_headers
is to read data file into data.table and then select what we want.
# White, black, and asian population in selected states. At summary level of state, there is no need to select state state_race <- read_acs1year( year = 2016, states = c("MA", "RI", "NY", "AL", "LA", "GA", "CA"), table_contents = c( "white = C02003_003", "black = C02003_004", "asian = C02003_006" ), geo_headers = "STATE", summary_level = "state", geo_comp = "all" # state level has geocomponent of all, uran, rural and more ) # area GEOID lon lat STATE state population white black asian GEOCOMP SUMLEV NAME # 1: MA 04000US25 -71.48959 42.15652 25 MA 6811779 5350175 506677 440845 all 040 Massachusetts # 2: RI 04000US44 -71.52529 41.59784 44 RI 1056426 850105 67526 36875 all 040 Rhode Island # 3: NY 04000US36 -75.59627 42.91340 36 NY 19745289 12530463 3080534 1668036 all 040 New York # 4: AL 04000US01 -86.84346 32.73963 01 AL 4863300 3316384 1301102 65611 all 040 Alabama # 5: LA 04000US22 -91.80327 30.85777 22 LA 4681666 2901495 1514755 77921 all 040 Louisiana # 6: GA 04000US13 -83.42321 32.62938 13 GA 10310371 6054861 3254495 398897 all 040 Georgia # 7: CA 04000US06 -119.54065 37.14857 06 CA 39250017 23420234 2265280 5602074 all 040 California # all large cities in CT city_race <- read_acs1year( year = 2016, states = "CT", table_contents = c( "white = C02003_003", "black = C02003_004", "asian = C02003_006" ), geo_headers = "PLACE", # geographic header of cities summary_level = "place" ) # area GEOID lon lat PLACE state population white black asian GEOCOMP SUMLEV NAME # 1: Bridgeport city 16000US0908000 -73.19573 41.18739 08000 CT 145934 63324 51009 3536 all 160 Bridgeport city, Connecticut # 2: Danbury city 16000US0918430 -73.47228 41.40184 18430 CT 85008 54757 8452 6202 all 160 Danbury city, Connecticut # 3: Hartford city 16000US0937000 -72.68334 41.76605 37000 CT 123287 42525 42736 2764 all 160 Hartford city, Connecticut # 4: New Britain city 16000US0950370 -72.78616 41.67655 50370 CT 72570 NA NA NA all 160 New Britain city, Connecticut # 5: New Haven city 16000US0952000 -72.92495 41.31081 52000 CT 129939 60133 40573 6652 all 160 New Haven city, Connecticut # 6: Norwalk city 16000US0955990 -73.41980 41.09274 55990 CT 88440 NA NA NA all 160 Norwalk city, Connecticut # 7: Stamford city 16000US0973000 -73.54603 41.07986 73000 CT 129105 80424 22345 10017 all 160 Stamford city, Connecticut # 8: Waterbury city 16000US0980000 -73.03668 41.55850 80000 CT 108269 68233 21850 3188 all 160 Waterbury city, Connecticut
Example 3 Get demographics in AIANHH of population over 65000 American Indian Area/Alaska Native Area/Hawaiian Home Land. We are not familiar with AIANHH and we do not know how to select data. The strategy is to first read all rows of data, include "AIANHH" as the column then examine what we have for the "AIANHH". Not supprising, most rows of "AIANHH" are empty, which should be removed and we will only keep those with a valid AIANHH code.
# read all rows of data, including "AIANHH" as a column, aianhh <- read_acs1year( year = 2016, state = "US", # population of American Indain or Alaska native, # and of Native Hawaiian and Other Pacific Islander table_contents = "Indian_Alaska_Native = C02003_005", geo_headers = "AIANHH", summary_level = "250" ) # GEOID lon lat AIANHH state population Indian_Alaska_Native GEOCOMP SUMLEV NAME # 1: 25000US2430 -109.76256 36.18428 2430 NA 175108 165794 all 250 Navajo Nation Reservation and Off-Reservation Trust Land, AZ--NM--UT # 2: 25000US5550 -95.18478 36.22364 5550 NA 515743 86570 all 250 Cherokee OTSA, OK # 3: 25000US5560 -98.95383 35.68827 5560 NA 186316 6469 all 250 Cheyenne-Arapaho OTSA, OK # 4: 25000US5580 -97.23257 34.46833 5580 NA 313580 21231 all 250 Chickasaw OTSA, OK # 5: 25000US5590 -95.44542 34.50746 5590 NA 230449 25223 all 250 Choctaw OTSA, OK # 6: 25000US5600 -97.08529 35.19939 5600 NA 124662 8020 all 250 Citizen Potawatomi Nation-Absentee Shawnee OTSA, OK # 7: 25000US5620 -96.01192 35.66597 5620 NA 794369 61742 all 250 Creek OTSA, OK # 8: 25000US5720 -98.89653 34.65708 5720 NA 192411 10419 all 250 Kiowa-Comanche-Apache-Fort Sill Apache OTSA, OK # 9: 25000US6785 -150.33555 62.05490 6785 NA 76958 4036 all 250 Knik ANVSA, AK # 10: 25000US9550 -85.28327 31.16645 9550 NA 87387 443 all 250 Cher-O-Creek SDTSA, AL # 11: 25000US9815 -79.16228 34.79173 9815 NA 505724 65611 all 250 Lumbee SDTSA, NC # 12: 25000US9960 -90.72665 29.59996 9960 NA 206588 7179 all 250 United Houma Nation SDTSA, LA
largest_100 <- c( "New York city, NY", "Los Angeles city, CA", "Chicago city, IL", "Houston city, TX", "Philadelphia city, PA", "Phoenix city, AZ", "San Antonio city, TX", "San Diego city, CA", "Dallas city, TX", "San Jose city, CA", "Austin city, TX", "Jacksonville city, FL", "San Francisco city, CA", "PLACE = IN36003", "Columbus city, OH", "Fort Worth city, TX", "Charlotte city, NC", "Seattle city, WA", "Denver city, CO", "El Paso city, TX", "Detroit city, MI", "Washington city, DC", "Boston city, MA", "Memphis city, TN", "PLACE = TN52006", "Portland city, OR", "Oklahoma City city, OK", "Las Vegas city, NV", "Baltimore city, MD", "PLACE = KY48006", "Milwaukee city, WI", "Albuquerque city, NM", "Tucson city, AZ", "Fresno city, CA", "Sacramento city, CA", "Kansas City city, MO", "Long Beach city, CA", "Mesa city, AZ", "Atlanta city, GA", "Colorado Springs city, CO", "Virginia Beach city, VA", "Raleigh city, NC", "Omaha city, NE", "Miami city, FL", "Oakland city, CA", "Minneapolis city, MN", "Tulsa city, OK", "Wichita city, KS", "New Orleans city, LA", "Arlington city, TX", "Cleveland city, OH", "Bakersfield city, CA", "Tampa city, FL", "Aurora city, CO", "Urban Honolulu CDP, HI", "Anaheim city, CA", "Santa Ana city, CA", "Corpus Christi city, TX", "Riverside city, CA", "St. Louis city, MO", "PLACE = KY46027", "Stockton city, CA", "Pittsburgh city, PA", "St. Paul city, MN", "PLACE = AK03000", "Cincinnati city, OH", "Henderson city, NV", "Greensboro city, NC", "Plano city, TX", "Newark city, NJ", "Toledo city, OH", "Lincoln city, NE", "Orlando city, FL", "Chula Vista city, CA", "Jersey City city, NJ", "Chandler city, AZ", "Fort Wayne city, IN", "Buffalo city, NY", "Durham city, NC", "St. Petersburg city, FL", "Irvine city, CA", "Laredo city, TX", "Lubbock city, TX", "Madison city, WI", "PLACE = AZ27400", "Norfolk city, VA", "Reno city, NV", "Winston-Salem city, NC", "Glendale city, AZ", "Hialeah city, FL", "Garland city, TX", "Scottsdale city, AZ", "Irving city, TX", "Chesapeake city, VA", "North Las Vegas city, NV", "Fremont city, CA", "Baton Rouge city, LA", "Spokane city, WA", "Rochester city, NY", "San Bernardino city, CA" ) race_100 <- read_acs1year( year = 2016, states = states_DC, table_contents = c( "white = C02003_003", "black = C02003_004", "asian = C02003_006" ), areas = largest_100, summary_level = "place" ) # area GEOID lon lat state population white black asian GEOCOMP SUMLEV NAME # 1: New York city, NY 16000US3651000 -73.93850 40.66427 NY 8537673 3627375 2073102 1205911 all 160 New York city, New York # 2: Los Angeles city, CA 16000US0644000 -118.41082 34.01939 CA 3976324 2079698 350543 452848 all 160 Los Angeles city, California # 3: Chicago city, IL 16000US1714000 -87.68184 41.83755 IL 2704965 1309585 802457 172298 all 160 Chicago city, Illinois # 4: Houston city, TX 16000US4835000 -95.38634 29.78047 TX 2304388 1318837 520127 171312 all 160 Houston city, Texas # 5: Philadelphia city, PA 16000US4260000 -75.13335 40.00938 PA 1567872 634111 661032 110733 all 160 Philadelphia city, Pennsylvania
# using 2016 ACS 5-year survey zip_acs5 <- read_acs5year( year = 2016, states = "US", geo_headers = "ZCTA5", table_contents = c( "white = B02001_002", "black = B02001_003", "asian = B02001_005" ), summary_level = "860" ) # using decennial census 2010 zip_2010 <- read_decennial( year = 2010, states = "US", table_contents = c( "white = P0030002", "black = P0030003", "asian = P0030005" ), geo_headers = "ZCTA5", summary_level = "860" )
Enrollment of college students in each school district.
school_district <- read_acs5year( year = 2015, states = "MA", table_contents = c( "male_population_15up = B14004_002", "male_public = B14004_003", "male_private = B14004_008" ), summary_level = "970" )
CDCURR
congress <- read_acs5year( year = 2015, states = "MA", table_contents = c( "male_population_15up = B14004_002", "male_public = B14004_003", "male_private = B14004_008" ), summary_level = "500" )
library(totalcensus) library(dplyr) # http://www.city-data.com/zipmaps/Chicago-Illinois.html to find a list of zip # code for Chicago zips <- c(60007, 60018, 60068, 60106, 60131, 60176, 60601, 60602, 60603, 60604, 60605, 60606, 60607, 60608, 60609, 60610, 60611, 60612, 60613, 60614, 60615, 60616, 60617, 60618, 60619, 60620, 60621, 60622, 60623, 60624, 60625, 60626, 60628, 60629, 60630, 60631, 60632, 60633, 60634, 60636, 60637, 60638, 60639, 60640, 60641, 60642, 60643, 60644, 60645, 60646, 60647, 60649, 60651, 60652, 60653, 60654, 60655, 60656, 60657, 60659, 60660, 60661, 60706, 60707, 60714, 60804, 60827) # read from 2016 ACS 5-year survey all_zip_acs5 <- read_acs5year( year = 2016, states = "US", table_contents = "below_poverty = B06012_006", # random example geo_headers = "ZCTA5", summary_level = "860" ) chicago_zip_acs5 <- filter(all_zip_acs5, ZCTA5 %in% zips) # GEOID lon lat ZCTA5 state population below_poverty GEOCOMP SUMLEV NAME # 1 86000US60007 -87.99736 42.00865 60007 <NA> 33733 1064 all 860 ZCTA5 60007 # 2 86000US60018 -87.91176 41.97939 60018 <NA> 30519 2123 all 860 ZCTA5 60018 # 3 86000US60068 -87.84343 42.01176 60068 <NA> 37567 1107 all 860 ZCTA5 60068 # 4 86000US60106 -87.94183 41.95970 60106 <NA> 20215 1147 all 860 ZCTA5 60106 # 5 86000US60131 -87.88426 41.93876 60131 <NA> 18072 943 all 860 ZCTA5 60131 # read from decennial census 2010 all_zip_2010 <- read_decennial( year = 2010, states = "IL", geo_headers = "ZCTA5", table_contents = c("male = P0120002", "female = P0120026"), # random example summary_level = "block" ) chicago_zip_2010 <- all_zip_2010[ZCTA5 %in% zips] zip_coord <- chicago_zip_2010[, .(lon_zip = mean(lon), lat_zip = mean(lat)), by = .(ZCTA5)] # lon lat ZCTA5 state population male female GEOCOMP SUMLEV # 1 -87.99695 42.00576 60007 IL 33820 16322 17498 all 871 # 2 -87.89337 42.00787 60018 IL 30099 15031 15068 all 871 # 3 -87.84100 42.01182 60068 IL 37475 17935 19540 all 871 # 4 -87.94604 41.95420 60106 IL 20309 10473 9836 all 871 # 5 -87.87489 41.93570 60131 IL 18097 9099 8998 all 871 chicago_map <- get_map("chicago", zoom = 10, color = "bw") ggmap(chicago_map) + geom_point(data = chicago_zip_2010[population != 0], aes(lon, lat, size = population, color = ZCTA5)) + geom_text(data = zip_coord, aes(lon_zip, lat_zip, label = ZCTA5), size = 3) + scale_size_area(max_size = 1) + ylim(41.6, 42.05) + guides(color = "none", size = "none")
# boston zip code boston_zip_2010 <- read_decennial( year = 2010, states = "MA", geo_headers = c("ZCTA5", "PLACE"), table_contents = c("male = P0120002", "female = P0120026"), # random example summary_level = "block" ) boston_zip <- boston_zip_2010[PLACE == "07000"] zip_coord <- boston_zip[, .(lon = mean(lon), lat = mean(lat)), by = .(ZCTA5)] boston_map <- get_map("roxbury, MA", zoom = 12, color = "bw") set1 <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999") color_palette <- rep(set1, 10) ggmap(boston_map) + geom_point(data = boston_zip[population != 0], aes(lon, lat, size = population, color = ZCTA5)) + geom_text(data = zip_coord[ZCTA5 != "99999"], aes(lon, lat, label = ZCTA5), size = 3) + scale_size_area(max_size = 3, breaks = c(10, 100, 500, 1000, 2000)) + scale_color_manual(values = color_palette) + # ylim(41.6, 42.05) + guides(color = "none") + theme(legend.position = c(1, 0), legend.justification = c(1, 0))
So far the data is only in ACS 1-year survey. Let's find the household computer ownershipt in major cities in 2016.
internet <- read_acs1year( year = 2016, states = states_DC, table_contents = c( "no_school = B15003_002", "nursery = B15003_003", "kindergarten = B15003_004", "g1 = B15003_005", "g2 = B15003_006", "g3 = B15003_007", "g4 = B15003_008", "g5 = B15003_009", "g6 = B15003_010", "g7 = B15003_011", "g8 = B15003_012", "g9 = B15003_013", "g10 = B15003_014", "g11 = B15003_015", "g12 = B15003_016", "high_school = B15003_017", "ged_high_school = B15003_018", "college_1_year = B15003_019", "college_more_year = B15003_020", "associate = B15003_021", "bachelor = B15003_022", "master = B15003_023", "professional = B15003_024", "doctor = B15003_025", "median_age = B01002_001", "white = C02003_003", "black = C02003_004", "native = C02003_005", "asian = C02003_006", "hawaiian = C02003_007", "others = C02003_008", "below_powerty_50 = B17002_002", "below_powerty_50_75 = B17002_003", "below_powerty_75_100 = B17002_004", "gini_index = B19083_001", "total_household = B28001_001", "median_household_income = B19019_001", "median_house_hold_income_1_person = B19019_002", "median_house_hold_income_2_person = B19019_003", "median_house_hold_income_3_person = B19019_004", "median_house_hold_income_4_person = B19019_005", "median_house_hold_income_5_person = B19019_006", "median_house_hold_income_6_person = B19019_007", "median_house_hold_income_7_or_more_person = B19019_008", "median_rent_per_income = B25071_001", "no_internet = B28011_008" ), geo_headers = "COUNTY", summary_level = "county" ) kaggle_internet <- copy(internet) %>% .[, ratio_no_internet := 100 * (no_internet / total_household)] %>% .[, below_middle_school := no_school + nursery + kindergarten + g1 + g2 + g3 + g4 + g5 + g6, + g7 + g8] .[, (county = area, state, GEOID, lon, lat, population, median_age, )] us_map <- get_map("united states", zoom = 4, color = "bw") ggmap(us_map) + geom_point( data = ratio, aes(lon,lat, size = population, color = ratio_no_computer) ) + facet_wrap(~GEOCOMP) + ylim(20, 50) + scale_color_continuous(low = "green", high = "red")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.