knitr::opts_chunk$set(echo = TRUE)

library(MazamaSpatialUtils)
setSpatialDataDir("~/Data/Spatial")

library(sp)
library(tmap)
library(tidyverse)
library(rgdal)
library(stringr)

# Loading the datasets 
#convertUSCensusCounties()
#convertUSCensusStates()

loadSpatialData("USCensusCounties")
loadSpatialData("USCensusStates")

Merging with dataset: California-Focused

#Note to myself: First time making a choropleth plot! What is it? Thematic maps that are used to represent statistical data through various shading patterns. 

# Testing base maps

# head(USCensusStates@data)

USCensusStates %>% 
  subset(stateCode == "CA") %>% 
  plot()

USCensusCounties %>% 
  subset(stateCode == "CA") %>% 
  plot()

# Would be more interesting if I just merge the data!
california <- USCensusCounties %>% 
  subset(stateCode == "CA") #let's use countyname

# How many counties are there in CA? There should be 58
unique(california@data$countyName) #checks out 

# Let's highlight Los Angeles County and see
USCensusCounties %>% 
  subset(countyName == "Los Angeles") %>% 
  plot()

# Fun fact: There are two islands in LA County: Catalina Island (great for diving) and San Clemente Island.

# Simplified map (1%) should make the islands disappear
USCensusCounties_01 %>% 
  subset(countyName == "Los Angeles") %>% 
  plot() 

COVID19 in California Based on Counties

# https://mgimond.github.io/Spatial/mapping-data-in-r.html

dataDir <- getSpatialDataDir()

url <- 'https://data.chhs.ca.gov/dataset/6882c390-b2d7-4b9a-aefa-2068cee63e47/resource/6cd8d424-dfaa-4bdd-9410-a3d656e1176e/download/covid19data.csv'

filePath <- file.path(dataDir,basename(url))
utils::download.file(url,filePath) #cool, this is just a csv file 

covid19data <- read.csv(filePath, header = TRUE) 

# colnames(covid19data)
# names(california@data)

# Turn to dates first
covid19data$Most.Recent.Date <- as.Date(covid19data$Most.Recent.Date, format = "%m/%d/%y")

covid19data <- dplyr::select(
  .data = covid19data,
    countyName = .data$County.Name,
    date = .data$Most.Recent.Date, 
    totalCountConfirmed = .data$Total.Count.Confirmed, #cumulative so let's select most recent 
    totalCountDeaths = .data$Total.Count.Deaths) %>% 
  filter(date == as.Date("2020-05-30")) # There is one unassigned column 

# Note: Line graphs would be great, especially for the total count confirmed and total count deaths! 

# Use dplyr left_join to merge data onto SFDF. Create a choropleth plot. Update: When using that, it turned it into a data frame. Let's try merge.

data_projected <- sp::merge(california, covid19data, by = "countyName")

# head(data_projected@data)  #unassigned should be gone

# Trying to make this map https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/

# Let's create a vector with the labels of counties/"cities" we want

lacities_highlight <- c("Los Angeles", "San Diego", "San Francisco", "Sacramento")

# Subset the SFDF
data_projected %>% 
  subset(countyName %in% lacities_highlight) %>%  
  plot()

# Wow, didn't realize how small San Francisco County was!

lacities_highlight_map <- data_projected %>% 
  subset(countyName %in% lacities_highlight) 

Based on LA Time's Map: https://www.latimes.com/projects/california-coronavirus-cases-tracking-outbreak/ (June 3, 2020)

# I tried my best to make them as similar as possible... I even made my own color palettes. The only big stylistic thing I changed was omitting Redding, CA. It would've printed its county name. Perhaps I can do more once I actually join it with city data. 

cases_palette <- c('#ebebeb','#b3e4f1','#7ec6da','#51a9c9', '#2f83a9','#0c5c7e','#05344a')

# Cumulative 
tm_shape(data_projected) +
  tm_borders(col = "white", lwd = 0.5) +
  tm_polygons("totalCountConfirmed", 
    style = "fixed", 
    palette = cases_palette, 
    title = "Confirmed cases",
    breaks = c(0, 1, 480, 1420, 2320, 5240, 6470, 55000),
    labels = c("0", "500", "1460", "2410", "5650", "7670", "58260"),
    interval.closure = "right",
    text.size = 0.75, 
    legend.is.portrait = FALSE) +
tm_shape(lacities_highlight_map) +
  tm_dots("countyName", size = 0.2, col = "black", shape = 16) +
  tm_text("countyName", just = "right", xmod = -.25, size = 1, col = "black", shadow = TRUE) +
  tm_layout(
    legend.title.size = 0.75,
    legend.format = list(text.align = "right"),
    legend.position = c("right", "top"),
    frame = FALSE) +
  tm_legend(outside = FALSE)

# Death
death_palette <- c('#ebebeb', '#d1d7da', '#b5bec9', '#93a7bc', '#708ba0', '#4b6373','#37404d')

# Cumulative Deaths
tm_shape(data_projected) +
  tm_borders(col = "white", lwd = 0.5) +
  tm_polygons("totalCountDeaths", 
    style = "fixed", 
    palette = death_palette, 
    title = "Deaths",
    breaks = c(0, 1, 20, 80, 140, 200, 260, 2360),
    labels = c("0", "20", "80", "140", "200", "270", "2480"),
    interval.closure = "right",
    text.size = 1.5, 
    legend.is.portrait = FALSE) +
  tm_shape(lacities_highlight_map) +
  tm_dots("countyName", size = 0.2, col = "black", shape = 16) +
  tm_text("countyName", just = "right", xmod = -.25, size = 1, col = "black", shadow = TRUE) +
 tm_layout(
    legend.title.size = 0.75,
    legend.format = list(text.align = "right"),
    legend.position = c("right", "top"),
    frame = FALSE) +
  tm_legend(outside = FALSE) 

City Data

# Scrape the web for the list of cities in LA County 

# Download table here: http://dashboard.publichealth.lacounty.gov/covid19_surveillance_dashboard/

url_city <- 'http://publichealth.lacounty.gov/media/Coronavirus/locations.htm'
raw <-rvest::html(url_city)
tables <- rvest::html_nodes(raw, "table") # list of tables on the site
test <- rvest::html_table(tables[[1]])

#brute force to clean it quickly...

a <-test[-(1:48),]
#a<-a[-(88:390),]
colnames(a) <- as.character(unlist(a[1,]))
cities <- a[-1,]
rm(a, raw, tables)

colnames(cities)[1] <- "city"
cities$city <- gsub(pattern = "City of |[*]", replacement = "", cities$city)

# All los angeles county 
cities$countyName <- "Los Angeles"

# Download the boundaries shape file 
# Let's find a LA base map 

url_boundary<- 'https://opendata.arcgis.com/datasets/7b0998f4e2ea42bda0068afc8eeaf904_19.zip'

filePath3 <- file.path(dataDir,basename(url_boundary))
utils::download.file(url_boundary,filePath3) 
utils::unzip(filePath3,exdir=file.path(dataDir, 'lacities'))

dsnPath <- file.path(dataDir, 'lacities')
shpName <- 'LA%20County%20City%20Boundaries'

la_SFDF <- convertLayer(
    dsn = dsnPath, 
    layer = shpName, 
    encoding = 'UTF-8'
  )

la_SFDF <- dplyr::select(
      .data = la_SFDF,
      city = .data$CITY_LABEL,
      city_number = .data$CITY_NO,
      shapearea = .data$ShapeSTAre,
      shapelen = .data$ShapeSTLen
  )

# What does it look like? 
la_SFDF %>% 
  plot()  #niiiiice

#unique(la_SFDF$city) 

la_SFDF<- la_SFDF %>% 
  #filter(city_number !="0") %>% 
  distinct(city_number, .keep_all = TRUE) %>% 
  mutate(city = gsub(pattern = "City of ", replacement = "", city)) %>% 
  mutate(city =gsub(pattern = "ñ", replacement = "n", city))
#require(sp)
#la_cities_map <- sp::merge(la_SFDF, cities) 

la_SFDF <- full_join(la_SFDF, cities, by = c('city'))

# Where is Pasadena? Long Beach? I see, they were not included in the url.

la_SFDF %>% plot()

tm_shape(la_SFDF) +
  tm_polygons("city") 



# Last time I checked, there are 88 cities in LA County. Must be duplicates. Odd, why are there missing cities? Ahh, turns out when I filtered by "LA County," some of the places were NA. Let's filter it by removing all the cities without a city number "0" and then remove the duplicates!

# Some of the cities are "missing" <- let's fill them in tomorrow and figure out why they're not! 

# Map 

# http://dashboard.publichealth.lacounty.gov/covid19_surveillance_dashboard/ Should be similar



# Note: Going to have to stop now. When I used sp:merge, I am left with the specific cities in the LA County (la_SFDF, cities). When printed, you don't see the "LA County" shape. Messed around a little bit with the la_SFDF filter and when I plotted, it looked like what I want. Full_joined with the cities df but dimensions were not matching. Maybe I need to fill the NA's? Will come back to this. 

Work in progress, not even remotely done. Definitely really like tmap but I still have a lot to embrace when it cmoes to shape files and the different layers! It's kind of similar to ggplot though, so that's nice! Definitely could have gotten more done but I had many questions along the way. This is so so fun! -Interested in understanding county population vulnerability (maybe food affordability?) -Crop yields by region -Glacial melting



MazamaScience/MazamaSpatialUtils documentation built on Sept. 14, 2023, 6 p.m.