library(readr) library(dplyr) get_eq_data <- function(url = "https://www.ngdc.noaa.gov/nndc/struts/results?type_0=Exact&query_0=$ID&t=101650&s=13&d=189&dfn=signif.txt"){ readr::read_delim(url,delim="\t", col_types = readr::cols()) } eq_clean_data <- function(data){ data <- data %>% dplyr::mutate(date = lubridate::make_date(year = YEAR, month = ifelse(is.na(MONTH),1,MONTH), day = ifelse(is.na(DAY),1,DAY)), LATITUDE = as.numeric(LATITUDE), LONGITUDE = as.numeric(LONGITUDE), EQ_PRIMARY = as.numeric(EQ_PRIMARY) ) } eq_location_clean <- function(data){ data <- data %>% dplyr::mutate(LOCATION_NAME = trimws(stringr::str_replace(LOCATION_NAME, pattern = "^.*:[:space:]{1}", replacement = ""))) %>% mutate(LOCATION_NAME = tools::toTitleCase(tolower(LOCATION_NAME))) # calling mutate on LOCATION_NAME twice to avoid excessive nesting }
library(ggplot2) library(dplyr) library(readr) library(stringr) library(lubridate) library(grid) library(scales) GeomTimeline <- ggplot2::ggproto("GeomTimeline", Geom, required_aes = c("x"), default_aes = aes(shape = 19, colour = "black", size = 1.5, alpha = NA, stroke = 0.5, fill="black", y = 0.25), draw_key = draw_key_point, draw_panel = function(data, panel_scales, coord) { coords <- coord$transform(data, panel_scales) # creates a line on which the earthquakes are plotted y_values <- unique(coords$y) no_y <- length(y_values) eq_line <- grid::polylineGrob(x = rep(c(0,1),no_y), y = sort(rep(y_values,2)), id=sort(rep(c(1:no_y),2)), gp=grid::gpar(col = "grey", lwd = 3)) #creates the points on the timeline eq_points <- grid::pointsGrob( coords$x, coords$y, pch = coords$shape, size = unit(coords$size*.pt, "mm"), gp = gpar(col = scales::alpha(coords$colour, coords$alpha), fill = scales::alpha(coords$fill, coords$alpha), fontsize = coords$size * .pt + coords$stroke * .stroke/2, lwd = coords$stroke * .stroke/2) ) plot <- grid::gTree(children = grid::gList(eq_line, eq_points)) } ) geom_timeline <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...) { theme_light() layer( geom = GeomTimeline, mapping = mapping, data = data, stat = stat, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(na.rm = na.rm, ...) ) } GeomTimelineLabel <- ggplot2::ggproto("GeomTimelineLabel", Geom, required_aes = c("x", "label"), default_aes = aes(shape = 19, colour = "black", size = 1.5, alpha = NA, stroke = 0.5, fill="black", y = 0.25, magnitude = NA), draw_key = draw_key_point, draw_panel = function(data, panel_scales, coord, n_max) { if(!is.na(n_max)){data <- data %>% dplyr::arrange(-magnitude) %>% head(n_max)} coords <- coord$transform(data, panel_scales) #str(coords) # creates the label lines #y_values <- unique(coords$y) no_y <- length(unique(coords$y)) label_line <- grid::polylineGrob(x = c(rbind(coords$x,coords$x)), y = c(rbind(coords$y,coords$y + (1/(no_y+4)))), id=sort(rep(c(1:length(coords$x)),2)), gp=grid::gpar(col = "grey", lwd = 2) ) #creates the labels label_text <- grid::textGrob(label = coords$label, x = coords$x, y = coords$y + (1/(no_y+3.8)), rot = 45, just = "left" ) plot <- grid::gTree(children = grid::gList(label_line, label_text)) } ) geom_timeline_label <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, n_max = NA, ...){ theme_light() layer( geom = GeomTimelineLabel, mapping = mapping, data = data, stat = stat, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(n_max = n_max, na.rm = na.rm, ...) ) }
eq_map <- function(data, annot_col=NA){ leaflet::leaflet(data) %>% leaflet::addTiles() %>% leaflet::addCircleMarkers(lng = ~LONGITUDE, lat = ~LATITUDE, radius = ~EQ_PRIMARY, weight = 2, popup = data[[annot_col]] ) } eq_create_label <- function(data){ data %>% dplyr::mutate(popup_text = paste0(ifelse(is.na(LOCATION_NAME), "", paste0("<b>Location: </b>", LOCATION_NAME)), ifelse(is.na(EQ_PRIMARY), "", paste0("</br><b>Magnitude: </b>",EQ_PRIMARY)), ifelse(is.na(TOTAL_DEATHS), "", paste0("</br><b>Total deaths: </b>", TOTAL_DEATHS)) )) }
This package provides a number of functions that can be used to retrieve, clean and visualize data from the NOAA Significant Earthquake Database.
The Significant Earthquake Database contains information on destructive earthquakes from 2150 B.C. to the present that meet at least one of the following criteria:
The database should be referenced to as: National Geophysical Data Center / World Data Service (NGDC/WDS): Significant Earthquake Database. National Geophysical Data Center, NOAA. doi:10.7289/V5TD9V7K
More information can be found at the NOAA website
This vignette will describe how to use this package. The package can be divided in three major parts:
The NOAA Significant Earthquake Database can be downloaded as a tab-delimited file from the NOAA website. get_eq_data
takes care of this and loads the data into a dataframe.
raw_earthquake_data <- get_eq_data()
The package has two functions for cleaning the data, eq_clean_data
and eq_location_clean
.
The eq_clean_data
function changes the datatypes of the columns LATITUDE
, LONGITUDE
and EQ_PRIMARY
to numeric. It also adds a column date
which has the date. The date column is constructed from the columns YEAR, MONTH and DAY. In some cases the DAY or MONTH is missing. In those cases, DAY or MONTH is set to 1.
The eq_location_clean
function cleans the description of the location from the column LOCATION_NAME
. The column LOCATION_NAME
has redundant country names which are removed by this function. Please note that some earthquakes have multiple locations associated with them. In these cases only the last location is preserved.
clean_earthquake_data <- eq_clean_data(raw_earthquake_data ) clean_location_earthquake_data <- eq_location_clean(clean_earthquake_data)
The functions for retrieving and cleaning earthquake data are suited to use in a pipe.
library(dplyr) clean_earthquake_data <- get_eq_data() %>% eq_clean_data() %>% eq_location_clean()
The package holds two two geoms that can be used in accord with the ggplot2-package to create a timeline with earthquakes and to label them.
The geom_timeline
can create a timeline. There is also an option to create stratified timelines for a factor y, e.g. country. The geom_timeline
requires an x-aesthetic with the date. Optional aesthetics include y (for multiple timelines), shape, colour, alpha, size, stroke and fill.
library(dplyr) library(ggplot2) data <- get_eq_data() %>% eq_clean_data() %>% eq_location_clean() subset <- dplyr::filter(data, COUNTRY %in% c("CHINA","USA","JAPAN"),date > as.POSIXlt.date("2016-01-01")) # basic use ggplot(subset, aes(x = date)) + geom_timeline() + theme_light() ggplot(subset, aes(x = date)) + geom_timeline(aes(size = EQ_PRIMARY, colour = COUNTRY), alpha = 0.3) + theme_light() #multiple timelines by setting the y aesthetic ggplot(subset, aes(x = date, y = COUNTRY)) + geom_timeline(aes(size = EQ_PRIMARY, colour = COUNTRY), alpha = 0.3) + theme_light()
The geom_timeline_label
can be used to annotate the timeline created with labels. The geom needs an x and label aesthetic. Optional aesthetics include y, and magnitude. The geom has an option to label only the earthquakes with the largest magnitudes. This option can be set with the n_max parameter. Note that the magnitude must be provided as an aesthetic to use the n_max parameter.
ggplot(subset, aes(x = date)) + geom_timeline(aes(size = EQ_PRIMARY, colour = COUNTRY), alpha = 0.3) + theme_light() + geom_timeline_label(aes(label = LOCATION_NAME)) # label multiple timelines ggplot(subset, aes(x = date, y = COUNTRY)) + geom_timeline(aes(size = EQ_PRIMARY, colour = COUNTRY), alpha = 0.3) + theme_light() + geom_timeline_label(aes(label = LOCATION_NAME)) # label only the largest earthquakes ggplot(subset, aes(x = date)) + geom_timeline(aes(size = EQ_PRIMARY, colour = COUNTRY), alpha = 0.3) + theme_light() + geom_timeline_label(aes(label = LOCATION_NAME, magnitude = EQ_PRIMARY), n_max = 5)
The package has also a function eq_map
to plot earthquake locations on an interactive map (using leaflet) and a function eq_create_label
to create informative popups for these locations.
The function eq_map
takes a dataframe with cleaned earthquake data and has an optional annot_col
-argument to create popups. The eq_create_label
function takes a dataframe and adss a column popup_text
to it which has the Location name, Magnitude and the number of deaths in HTML-format.
library(dplyr) data <- get_eq_data() %>% eq_clean_data() %>% eq_location_clean() data_mex <- dplyr::filter(data, COUNTRY == "MEXICO" & lubridate::year(date) >= 2000) # without popups eq_map(data_mex) # date as popup eq_map(data_mex, annot_col = "date") # popups with eq_create_label data_mex_popup <- eq_create_label(data_mex) eq_map(data_mex_popup, annot_col = "popup_text")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.