knitr::opts_chunk$set(dev = "png", dpi = 200)
# attaching necessary packages
library(tidyverse)
library(magrittr)
library(sf)
library(spData)
library(spDataLarge)
library(maps)
library(wesanderson)
library(gridExtra)

# loading and combining tiny datasets
files <-  list.files(file.path(here::here(), "vignettes"),
                     pattern = "*.tiny*.rds" ,
                     full.names = T) %>%
  sort()

readRDS_n_tag <- function(x){

  tag_string <- stringr::str_split(x, "/", simplify = T) %>%
    magrittr::extract2(grep(.,pattern = ".rds")) %>%
    stringr::str_split("_", simplify = T) %>%
    magrittr::extract2(2)

  readRDS(x) %>% 
    add_column(tag_col = tag_string) %>%
    dplyr::select(tag_col, everything())
}



slf_tiny <- files %>%
  map_dfr(readRDS)

There are many sources contributing data on the presence of spotted lanternfly in the US. Data must be collected and organized coherently before proceeding to visualization and analysis. All these steps can be then bundled together, enhancing collaboration and reproducibility. To do so, we use an R package approach. R is one of the most widely used software for Data Science, and its popularity is largely due to it being open source. R packages provide a way to organize resources such as data and tools to manipulate and analyze it. In addition, packages contain useful documentation describing how to do use such resources.

We built an R package to be shared internally among collaborators in an effort to improve reproducibility and consistency of the analyses. Collaborators can work independently on separate phases of the analyses within this package, and keep each step appropriately documented for others to consult and follow up on. We are currently in the early stages of package development, and are compiling a single dataset on spotted lanternfly presence in the US from several data sources. There are three major sources of data that are combined in the package:

We are currently looking to expand the number of sources of data, to have a full representation of SLF presence.

Data Visualization

Once all data is combined into a single dataset, we can move to data visualization. Here we show a very simple map (Figure 1) of SLF presence in the US, generated from the data described above. The grey crosses represent surveys records that were negative for SLF. Colored dots represent positive ones, and are coded based on the year of survey.

Figure 2 shows instead the same data broken down by year, and color coded to represent the source of the records (PDA, Neighboring states, or USDA). The negative records are also color coded. The map provides a good way to show the improved coverage achieved in 2018 and 2019, as the effort to monitor SLF ramped up.

states <- sf::st_as_sf(map("state", plot = FALSE, fill = TRUE))
states <- cbind(states, st_coordinates(st_centroid(states)))
states$ID
# making table key for state 2-letter abbreviations
state_abbr <- tibble(state.name = str_to_lower(state.name), state.abb) %>% 
  left_join(tibble(ID = states$ID), ., by = c(ID = "state.name")) %>% 
  mutate(state.abb = replace_na(state.abb, ""))
# adding 2-letter codes to sf
states$code <- state_abbr$state.abb

png(file.path(here::here(), "figures", "SLF_spread.png"), width = 7, height = 7, unit = "in", res = 300)
ggplot(data = world) +
    geom_sf() +
    geom_sf(data = states, fill = "white") + 
    #coord_sf(xlim = c(-85, -71), ylim = c(33, 45.5), expand = FALSE) +
    coord_sf(xlim = c(-84, -71), ylim = c(34, 45), expand = FALSE) +
  geom_point(data = slf_tiny %>% filter(!slf_present), 
             aes(x = Longitude, y = Latitude), 
             col = "grey", alpha = 0.3, shape = 4, size = .5) +
  geom_point(data = slf_tiny %>% filter(slf_present) %>%  arrange(desc(Year)),
            aes(x = Longitude, y = Latitude, 
                col = Year), shape = 19, size =0.8) +
  geom_text(data = states, aes(X, Y, label = code), size = 4.5) +
  scale_color_gradientn(colours = wesanderson::wes_palette("Zissou1", 100, type = "continuous"))
dev.off()
sim <- tibble(sites = c("Allegheny Forest", "Berks County", "Northern VA", "South Carolina"),
                            longitude = c(-78.5, -75.5, -78.2, -79.5),
                            latitude = c(41.5, 40.5, 39.2, 34.5))

jpeg(file.path(here::here(), "figures", "Simulation_sites.jpeg"), width = 6, height = 8, unit = "in", res = 300)
ggplot(data = world) +
    geom_sf() +
    geom_sf(data = states, fill = "white") + 
    coord_sf(xlim = c(-82, -71), ylim = c(33, 44), expand = FALSE) +
  geom_point(data = slf_tiny %>% filter(slf_present) %>%  arrange(desc(Year)),
            aes(x = Longitude, y = Latitude), shape = 19, col = "grey60") +
  geom_point(data = sim, aes(x = longitude, y = latitude),
             col = "chocolate1", fill = "chocolate1", size =5, shape = 23) +
  geom_label_repel(data = sim, aes(x = longitude, y = latitude, label = sites),
                  box.padding   = 0.5, 
                  point.padding = 1,
                  segment.color = 'black')

dev.off()
table(slf_tiny$Source)

slf_tiny %<>% 
  mutate(Source = ifelse(Source == "PDA", "State", Source))

table(slf_tiny$Year, slf_tiny$State)

for(i in unique(slf_tiny$Year) %>% sort){

  g <- ggplot(data = world) +
    geom_sf() +
    geom_sf(data = states, fill = "white") +
    #coord_sf(xlim = c(-81, -71), ylim = c(38, 44), expand = FALSE) +
    coord_sf(xlim = c(-85, -71), ylim = c(33, 45.5), expand = FALSE) +
  geom_point(data = slf_tiny %>% filter(!slf_present, Year == i), 
             aes(x = Longitude, y = Latitude, col = Source),
             alpha = 0.2, shape = 4, size = .8) +
  geom_point(data = slf_tiny %>% filter(slf_present, Year == i) %>%  arrange(desc(Year)),
            aes(x = Longitude, y = Latitude, 
                col = Source), shape = 19) +
    annotate("text", x = -73, y = 34, label = paste(i), size = 6) +
    # annotate("text", x = -73, y = 38.5, label = paste("Year", i), size = 6) +
  scale_color_manual(values = wesanderson::wes_palette("IsleofDogs1", 3, type = "discrete")[c(1,3)]) +
  theme(legend.position = "none")

  assign(paste0("g", i), g)

}

#### MAKE THIS INTO FUNCTION
get_legend<-function(myggplot){
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

legend <- get_legend(
  ggplot() +
  geom_point(data = slf_tiny %>% filter(slf_present, Year == 2019) %>%  arrange(desc(Year)),
            aes(x = Longitude, y = Latitude, 
                col = Source), shape = 19) +
  scale_color_manual(values = wesanderson::wes_palette("IsleofDogs1", 3, type = "discrete")[c(1,3)]) +
    theme(legend.position = "top"))

#pdf(file.path(here::here(), "figures", "SLF_map.pdf"), width = 12, height = 9)
png(file.path(here::here(), "figures", "SLF_map.png"), width = 12, height = 9, unit = "in", res = 200)
grid.arrange(legend, g2014, g2015, g2016, g2017, g2018, g2019,
             ncol=3, nrow = 3, 
             layout_matrix = rbind(c(1,1,1), c(2,3,4), c(5,6,7)),
             widths = c(5, 5, 5), heights = c(1, 5, 5))
dev.off()
rmarkdown::render(file.path(here::here(), "vignettes", "100_Introduction_to_lycormap.Rmd"),
                  output_file = file.path(here::here(), "vignettes", "100_Introduction_to_lycormap.pdf"))


victoria-ramirez/expanded_slf documentation built on July 1, 2020, 12:15 a.m.