In RobWHickman/sf.chlorodot: Easy Dot Density Maps using Simple Features

sf.dotdensity

Simple Dot Density Maps using sf

A packaged version of Paul Campbell's blogpost found here. Hopefully should allow for faster generation of dot density maps for people wanting to emulate the post. Also contains the data from the blogpost as an example.

Functions

Package contains two functions in ./R - random_round() straight from Paul Campbell's blogpost and Jens von Bergmann's package for dot densitys. Rounds numbes up or down randomly - calc_dots() modified Paul Campbell's blogpost pipeline into a function in base R to calculate the random position of dots within a shapefile

Examples

Two examples are presented below. The first using the data from the original blogpost. A second one scraping South African census 2011 data from Adrian Frith's site to plot the distribution of primary languages within the country. Both are plotted using custom theme elements from the original blog.

#load libraries
library(sf)
#devtools::install_github("RobWHickman/sf.dotdensity")
library(sf.dotdensity)

#load the data
london_shapefile <- sf.dotdensity::london_shapefile
london_election_data <- sf.dotdensity::london_election_data

#get the data to plot
#merge a shapefile with the population data
london_sf_data <- merge(london_shapefile, london_election_data, by = "ons_id")

#the columns we want to select and plot
parties <- names(london_sf_data)[4:8]
#set up a colour scale for these if so inclined
colours = c("deepskyblue", "red", "gold", "purple", "green")
names(colours) = parties

#how many people should lead to one dot
people_per_dots = 1000

#calculate the dot positions for each column
london_dots <- calc_dots(df = london_sf_data,
                         col_names = parties,
                         n_per_dot = people_per_dots)

#plot the results
library(ggplot2)
london_plot <- ggplot() +
  #first add the shape as a background
  geom_sf(data = london_sf_data, fill = "transparent",colour = "white") +
  #add the dots
  geom_point(data = london_dots, aes(lon, lat, colour = variable), size = 0.5) +
  #colour based on the scale already defined
  scale_colour_manual(name = "Party", values = colours) +
  #title
  ggtitle("Dot Density Map of London in the 2017 General Election",
          subtitle = paste("one dot equals", people_per_dots, "people")) +
  theme_dotdensity() +
  #make the legend shapes bigger so it's possible to see them clearly
  guides(colour = guide_legend(override.aes = list(size = 10)))

#plot
london_plot

Another good example using Bosnian ethnicity data (which is used in the package hex sticker) can be found here. The data needed to reproduce this figure is contained within the package and the code is provided below.

bosnia_sf_data <- merge(bosnia_shapefile, bosnia_ethnicity_data, by = "district")
bosnia_sf_data[is.na(bosnia_sf_data)] <- 0

#we'll only focus on the three most prominent ethnicities
ethnicities <- c("Bosniak", "Serb", "Croat")
bosnia_palette <- c("goldenrod", "mediumblue", "white")
names(bosnia_palette) <- ethnicities

bosnia_dots <- calc_dots(bosnia_sf_data, col_names = ethnicities, n_per_dot = 100)

bosnia_plot <- ggplot() +
  geom_sf(data = bosnia_shapefile, fill = NA, colour = "white") +
  geom_point(data = bosnia_dots, aes(lon, lat, colour = variable), 
             size = 0.5, alpha = 0.7, shape = 19) +
  scale_colour_manual(name = "Party", values = bosnia_palette) +
  theme_dotdensity(legend = FALSE) +
  guides(colour = guide_legend(override.aes = list(size = 10)))

bosnia_plot

#old South African languages example
#now deprecated- needs tweaking to rescrape language data
#libraries to download and munge data
library(rvest)
library(tidyverse)

#download the South African shapefile fom gadm
admin_url <- "https://biogeo.ucdavis.edu/data/gadm3.6/Rsf/gadm36_ZAF_3_sf.rds"
download.file(admin_url, destfile = "shapefiles.rds", mode = "wb")
south_africa <- readRDS("shapefiles.rds") %>%
  #convert to sf
  st_as_sf() %>%
  select(region = NAME_3) %>%
  #merge geometries that have two rows
  group_by(region) %>%
  summarise()

#get the links to the data from Adrian Frith's site
sa_data_url <- "https://census2011.adrianfrith.com"
south_africa_data <- sa_data_url %>%
  read_html() %>% 
  html_nodes(".namecell a") %>%
  html_attr("href") %>%
  paste0(sa_data_url, .) %>%
  lapply(., function(x) read_html(x) %>% 
           html_nodes(".namecell a") %>%
           html_attr("href") %>%
           paste0(sa_data_url, .)) %>%
  unlist() %>%
   lapply(., function(x) read_html(x) %>% html_nodes(".namecell a") %>% html_attr("href") %>% paste0(sa_data_url, .)) %>% unlist()

#scrape the data on primary language from the 2011 South African census
language_data <- rbindlist(lapply(south_africa_data, function(x) {
  read <- read_html(x)
  language_nos <- read %>% html_nodes(".datacell") %>% html_text()
  start <- grep("Percentage", language_nos)[3] + 1
  stop <- grep("Population", language_nos) - 1
  #some areas have no data
  if(!is.na(start) & !is.na(stop)) {
    language_nos <- language_nos[start:stop]
    language_nos <- language_nos[seq(1, length(language_nos), 2)]
  } else {
    language_nos <- NA
  }

  languages <- read %>% html_nodes("tr > :nth-child(1)") %>% html_text()
  start <- grep("First language", languages) + 1
  stop <- grep("Name", languages) - 1
  if(length(start) > 0 & !is.na(stop)) {
    languages <- languages[start:stop]
  } else {
    languages <- NA
  }

  region_names <- read %>% html_nodes(".topname") %>% html_text()

  #combine into a df
  df <- data.frame(language = languages, primary_speakers = language_nos, region = region_names)
  return(df)
})) %>%
  #convert number of speakers to numeric
  mutate(primary_speakers = as.numeric(as.character(primary_speakers))) %>%
  #matching of area names with South African shapefile
  mutate(region = gsub(" NU", "", region)) %>%
  mutate(region = gsub("Tshwane", "City of Tshwane", region)) %>%
  #filter only the data we want to merge
  filter(region %in% south_africa$region) %>%
  filter(!is.na(language)) %>%
  filter(language != "Not applicable") %>%
  #spread the data
  dcast(., region ~ language, value.var = "primary_speakers", fun.aggregate = sum) %>%
  #join in the spatial geometry
  left_join(., south_africa) %>%
  #convert to sf
  st_as_sf()

#calculate the dot locations using the package
sf_dots <- calc_dots(df = language_data, col_names = names(language_data)[2:14], n_per_dot = 1000)

#plot it
#stolen the background colour scheme from Paul Campbell's blog
#original inspiration for this package
p <- ggplot() +
  geom_sf(data = south_africa, fill = "transparent",colour = "white") +
  geom_point(data = sf_dots, aes(lon, lat, colour = variable), size = 0.5) +
  scale_colour_discrete(name = "Primary Language") +
  ggtitle("Language Diversity in South Africa") +
  theme_void() +
  guides(colour = guide_legend(override.aes = list(size = 10))) +
  theme(plot.background = element_rect(fill = "#212121", color = NA), 
        panel.background = element_rect(fill = "#212121", color = NA),
        legend.background = element_rect(fill = "#212121", color = NA),
        text =  element_text(color = "white"),
        title =  element_text(color = "white"),
        legend.text=element_text(size=12))

p