knitr::opts_chunk$set(echo = TRUE) library(MazamaSpatialUtils) setSpatialDataDir("~/Data/Spatial") library(sp) library(tmap) library(tidyverse) library(rgdal) library(stringr) # Loading the datasets #convertUSCensusCounties() #convertUSCensusStates() loadSpatialData("USCensusCounties") loadSpatialData("USCensusStates")
Merging with dataset: California-Focused
#Note to myself: First time making a choropleth plot! What is it? Thematic maps that are used to represent statistical data through various shading patterns. # Testing base maps # head(USCensusStates@data) USCensusStates %>% subset(stateCode == "CA") %>% plot() USCensusCounties %>% subset(stateCode == "CA") %>% plot() # Would be more interesting if I just merge the data! california <- USCensusCounties %>% subset(stateCode == "CA") #let's use countyname # How many counties are there in CA? There should be 58 unique(california@data$countyName) #checks out # Let's highlight Los Angeles County and see USCensusCounties %>% subset(countyName == "Los Angeles") %>% plot() # Fun fact: There are two islands in LA County: Catalina Island (great for diving) and San Clemente Island. # Simplified map (1%) should make the islands disappear USCensusCounties_01 %>% subset(countyName == "Los Angeles") %>% plot()
COVID19 in California Based on Counties
# https://mgimond.github.io/Spatial/mapping-data-in-r.html dataDir <- getSpatialDataDir() url <- 'https://data.chhs.ca.gov/dataset/6882c390-b2d7-4b9a-aefa-2068cee63e47/resource/6cd8d424-dfaa-4bdd-9410-a3d656e1176e/download/covid19data.csv' filePath <- file.path(dataDir,basename(url)) utils::download.file(url,filePath) #cool, this is just a csv file covid19data <- read.csv(filePath, header = TRUE) # colnames(covid19data) # names(california@data) # Turn to dates first covid19data$Most.Recent.Date <- as.Date(covid19data$Most.Recent.Date, format = "%m/%d/%y") covid19data <- dplyr::select( .data = covid19data, countyName = .data$County.Name, date = .data$Most.Recent.Date, totalCountConfirmed = .data$Total.Count.Confirmed, #cumulative so let's select most recent totalCountDeaths = .data$Total.Count.Deaths) %>% filter(date == as.Date("2020-05-30")) # There is one unassigned column # Note: Line graphs would be great, especially for the total count confirmed and total count deaths! # Use dplyr left_join to merge data onto SFDF. Create a choropleth plot. Update: When using that, it turned it into a data frame. Let's try merge. data_projected <- sp::merge(california, covid19data, by = "countyName") # head(data_projected@data) #unassigned should be gone # Trying to make this map https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/ # Let's create a vector with the labels of counties/"cities" we want lacities_highlight <- c("Los Angeles", "San Diego", "San Francisco", "Sacramento") # Subset the SFDF data_projected %>% subset(countyName %in% lacities_highlight) %>% plot() # Wow, didn't realize how small San Francisco County was! lacities_highlight_map <- data_projected %>% subset(countyName %in% lacities_highlight)
Based on LA Time's Map: https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/ (June 3, 2020)
# I tried my best to make them as similar as possible... I even made my own color palettes. The only big stylistic thing I changed was omitting Redding, CA. It would've printed its county name. Perhaps I can do more once I actually join it with city data. cases_palette <- c('#ebebeb','#b3e4f1','#7ec6da','#51a9c9', '#2f83a9','#0c5c7e','#05344a') # Cumulative tm_shape(data_projected) + tm_borders(col = "white", lwd = 0.5) + tm_polygons("totalCountConfirmed", style = "fixed", palette = cases_palette, title = "Confirmed cases", breaks = c(0, 1, 480, 1420, 2320, 5240, 6470, 55000), labels = c("0", "500", "1460", "2410", "5650", "7670", "58260"), interval.closure = "right", text.size = 0.75, legend.is.portrait = FALSE) + tm_shape(lacities_highlight_map) + tm_dots("countyName", size = 0.2, col = "black", shape = 16) + tm_text("countyName", just = "right", xmod = -.25, size = 1, col = "black", shadow = TRUE) + tm_layout( legend.title.size = 0.75, legend.format = list(text.align = "right"), legend.position = c("right", "top"), frame = FALSE) + tm_legend(outside = FALSE) # Death death_palette <- c('#ebebeb', '#d1d7da', '#b5bec9', '#93a7bc', '#708ba0', '#4b6373','#37404d') # Cumulative Deaths tm_shape(data_projected) + tm_borders(col = "white", lwd = 0.5) + tm_polygons("totalCountDeaths", style = "fixed", palette = death_palette, title = "Deaths", breaks = c(0, 1, 20, 80, 140, 200, 260, 2360), labels = c("0", "20", "80", "140", "200", "270", "2480"), interval.closure = "right", text.size = 1.5, legend.is.portrait = FALSE) + tm_shape(lacities_highlight_map) + tm_dots("countyName", size = 0.2, col = "black", shape = 16) + tm_text("countyName", just = "right", xmod = -.25, size = 1, col = "black", shadow = TRUE) + tm_layout( legend.title.size = 0.75, legend.format = list(text.align = "right"), legend.position = c("right", "top"), frame = FALSE) + tm_legend(outside = FALSE)
City Data
# Scrape the web for the list of cities in LA County # Download table here: http://dashboard.publichealth.lacounty.gov/covid19_surveillance_dashboard/ url_city <- 'http://publichealth.lacounty.gov/media/Coronavirus/locations.htm' raw <-rvest::html(url_city) tables <- rvest::html_nodes(raw, "table") # list of tables on the site test <- rvest::html_table(tables[[1]]) #brute force to clean it quickly... a <-test[-(1:48),] #a<-a[-(88:390),] colnames(a) <- as.character(unlist(a[1,])) cities <- a[-1,] rm(a, raw, tables) colnames(cities)[1] <- "city" cities$city <- gsub(pattern = "City of |[*]", replacement = "", cities$city) # All los angeles county cities$countyName <- "Los Angeles" # Download the boundaries shape file # Let's find a LA base map url_boundary<- 'https://opendata.arcgis.com/datasets/7b0998f4e2ea42bda0068afc8eeaf904_19.zip' filePath3 <- file.path(dataDir,basename(url_boundary)) utils::download.file(url_boundary,filePath3) utils::unzip(filePath3,exdir=file.path(dataDir, 'lacities')) dsnPath <- file.path(dataDir, 'lacities') shpName <- 'LA%20County%20City%20Boundaries' la_SFDF <- convertLayer( dsn = dsnPath, layer = shpName, encoding = 'UTF-8' ) la_SFDF <- dplyr::select( .data = la_SFDF, city = .data$CITY_LABEL, city_number = .data$CITY_NO, shapearea = .data$ShapeSTAre, shapelen = .data$ShapeSTLen ) # What does it look like? la_SFDF %>% plot() #niiiiice #unique(la_SFDF$city) la_SFDF<- la_SFDF %>% #filter(city_number !="0") %>% distinct(city_number, .keep_all = TRUE) %>% mutate(city = gsub(pattern = "City of ", replacement = "", city)) %>% mutate(city =gsub(pattern = "ñ", replacement = "n", city))
#require(sp) #la_cities_map <- sp::merge(la_SFDF, cities) la_SFDF <- full_join(la_SFDF, cities, by = c('city')) # Where is Pasadena? Long Beach? I see, they were not included in the url. la_SFDF %>% plot() tm_shape(la_SFDF) + tm_polygons("city") # Last time I checked, there are 88 cities in LA County. Must be duplicates. Odd, why are there missing cities? Ahh, turns out when I filtered by "LA County," some of the places were NA. Let's filter it by removing all the cities without a city number "0" and then remove the duplicates! # Some of the cities are "missing" <- let's fill them in tomorrow and figure out why they're not! # Map # http://dashboard.publichealth.lacounty.gov/covid19_surveillance_dashboard/ Should be similar # Note: Going to have to stop now. When I used sp:merge, I am left with the specific cities in the LA County (la_SFDF, cities). When printed, you don't see the "LA County" shape. Messed around a little bit with the la_SFDF filter and when I plotted, it looked like what I want. Full_joined with the cities df but dimensions were not matching. Maybe I need to fill the NA's? Will come back to this.
Work in progress, not even remotely done. Definitely really like tmap but I still have a lot to embrace when it cmoes to shape files and the different layers! It's kind of similar to ggplot though, so that's nice! Definitely could have gotten more done but I had many questions along the way. This is so so fun! -Interested in understanding county population vulnerability (maybe food affordability?) -Crop yields by region -Glacial melting
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.