R/NOAAviz.R

Defines functions eq_datemaker eq_location_clean eq_clean_data geom_timeline geom_timeline_label eq_map eq_create_label

Documented in eq_clean_data eq_create_label eq_datemaker eq_location_clean eq_map geom_timeline geom_timeline_label

#' Make proper dates from NOOA data
#'
#' The eq_datemaker helper function takes in YEAR, MONTH, and DAY number from
#' [NOOA data](https://www.ngdc.noaa.gov/nndc/struts/form?t=101650&s=1&d=1)
#' controls for BCE dates and missing values, and returns a date in posixCt
#' format.
#'
#' @import lubridate
#' @importFrom stringr str_pad
#'
#' @param yr year from YEAR column of NOAA data
#' @param mnth month from MONTH column of NOAA data (sometimes NA)
#' @param dy day from DAY column of NOAA data (sometimes NA)
#'
#' @return A posixCt object from coresponding inputs controlling
#'         for missing and BCE dates.
#'
#' @examples
#' \dontrun{
#'     eq_datemaker(-1142, 11, NA)
#' }
#'


eq_datemaker <- function(yr, mnth, dy){
    yr_str <- stringr::str_pad(abs(yr), 4, "left", pad = "0") #processed year
    mnth_str <- ifelse(is.na(mnth), 1, mnth) #processed month
    dy_str <- ifelse(is.na(dy), 1, dy) #processed day

    date_str <- paste(yr_str, mnth_str, dy_str, sep = "-")
    # black magic for when the year is BCE
    if(yr<0){
        lubridate::ymd("0000-01-01") -
            lubridate::years(abs(yr) - 1) -
            months(12 - as.numeric(mnth_str)) -
            (lubridate::days_in_month(
                as.numeric(mnth_str)) + 1 - as.numeric(dy_str))
    }else lubridate::ymd(date_str)
}


#' Clean location names from NOAA data
#'
#' Given a LOCATION from NOAA data strips out the country name and ':', leaving
#' all characters following and converting to title case. Non-exported helper
#' function.
#'
#' @importFrom stringr str_to_title
#'
#' @param loc_name NOAA dataset style name, with country preceeding ':'
#'                 and further details.
#'
#' @return A cleaned location string.
#'
#' @examples
#' \dontrun{
#'     eq_location_clean("Canada: Land of the free")
#' }
#'


eq_location_clean <- function(loc_name){
    loc_name <- gsub(".*:","",loc_name)
    loc_name <- stringr::str_to_title(
        trimws(loc_name)
    )
    return(loc_name)
}


#' Earthquake data cleaner
#'
#' This function takes a NOAA data frame, and cleans it for analysis.
#'
#' @importFrom dplyr mutate rowwise ungroup
#' @importFrom magrittr %>%
#'
#' @export
#'
#' @param raw_NOOA_data Unlceaned NOAA dataframe.
#'
#' @return A dataframe containing the cleaned data set.
#'
#' @examples
#' \dontrun{
#' paste0("https://www.ngdc.noaa.gov/",
#'        "nndc/struts/results?type_0=Exact&",
#'        "signif.txtquery_0=$ID&t=101650&s=13&",
#'        "d=189&dfn=signif.txt") %>%
#'     readr::read_delim(delim = "\t") %>%
#'     eq_clean_data()
#' }
#'


eq_clean_data <- function(raw_NOOA_data){
    dplyr::mutate(dplyr::rowwise(raw_NOOA_data),
                  DATE = eq_datemaker(YEAR, MONTH, DAY),
                  LATITUDE = as.numeric(LATITUDE),
                  LONGITUDE = as.numeric(LONGITUDE),
                  LOCATION_NAME = eq_location_clean(LOCATION_NAME)) %>%
        dplyr::ungroup()
}



#' Dates timeline geom
#'
#' @describeIn  geom_timeline
#' @import grid
#' @import ggplot2
#'
#'
GeomTimeline <- ggplot2::ggproto("GeomTimeline", Geom,
                        required_aes = c("x"),
                        default_aes = ggplot2::aes(y = 0.1, colour = "grey", fill = "grey",
                                          size = 3, # summary(data$EQ_PRIMARY)
                                          alpha = 0.3, shape = 21, stroke = 0.5),
                        draw_key = draw_key_point, # GeomPoint$draw_key; GeomPoint$default_aes
                        draw_panel = function(data, panel_scales, coord) {

                            # transformation
                            coords <- coord$transform(data, panel_scales)

                            # grob objects
                            # segment
                            g_segs <- grid::segmentsGrob(
                                x0 = min(coords$x), x1 = max(coords$x),
                                y0 = coords$y, y1 = coords$y,
                                gp = grid::gpar(
                                    col = "grey",
                                    lwd = 1 * .pt
                                )
                            )

                            # points
                            g_pts <- grid::pointsGrob(
                                x = coords$x, y = coords$y,
                                pch = coords$shape,
                                size = unit(coords$size / 3, "char"),
                                gp = grid::gpar(
                                    col = scales::alpha(coords$colour, coords$alpha),
                                    fill = scales::alpha(coords$fill, coords$alpha)
                                )
                            )

                            # tree for multiple lines
                            grid::gTree(children = grid::gList(g_segs, g_pts))
                        }
)


#' Timeline geom to visualize NOAA earthquakes data
#'
# geom_timeline is used for "plotting a time line of earthquakes ranging
# from xmin to xmaxdates with a point for each earthquake. Optional
# aesthetics include color, size, and alpha (for transparency). The
# xaesthetic is a date and an optional y aesthetic is a factor
# indicating some stratification in which case multiple time lines
# will be plotted for each level of the factor (e.g. country)."
# (Mastering Software Development in R Capstone, Week 2)
#'
#' @import grid
#'
#' @export
#'
#' @param mapping aesthetic mappings
#' @param data The datafram to create a timeline for
#' @param stat The stat to transform the data with, by default "identity"
#' @param position Adjusts the position of the layer in the grob
#' @param na.rm If FALSE, the default, missing values are removed with
#'              a warning. If TRUE, missing values are silently removed.
#' @param show.legend Display legends
#' @param inherit.aes Should the aes be inherited
#' @param ... Other args
#'
#' @return a ggplot timeline object for plotting a timeline
#'
#' @examples
#' \dontrun{
#' paste0("https://www.ngdc.noaa.gov/",
#'        "nndc/struts/results?type_0=Exact&",
#'        "signif.txtquery_0=$ID&t=101650&s=13&",
#'        "d=189&dfn=signif.txt") %>%
#'     readr::read_delim(delim = "\t") %>%
#'     eq_clean_data() %>%
#'     dplyr::filter(lubridate::year(DATE) >= 2000,
#'                   COUNTRY == "MEXICO") %>%
#'     ggplot2::ggplot(aes(x = DATE)) +
#'     geom_timeline()
#' }
#'

geom_timeline <- function(mapping = NULL, data = NULL, stat = "identity",
                          position = "identity", na.rm = FALSE,
                          show.legend = NA, inherit.aes = TRUE, ...) {
    layer(
        geom = GeomTimeline, mapping = mapping,
        data = data, stat = stat, position = position,
        show.legend = show.legend, inherit.aes = inherit.aes,
        params = list(na.rm = na.rm, ...)
    )
}

#' Label geom for use with timeline geom
#'
#' @describeIn  geom_timeline_label
#' @import grid
#' @import ggplot2
#'

GeomTimelineLabel <- ggplot2::ggproto("GeomTimelineLabel", Geom,
                             required_aes = c("x", "label"),
                             default_aes = ggplot2::aes(n_max = NA, y = 0.1, colour = "grey"),
                             draw_key = draw_key_label, # GeomPoint$draw_key; GeomPoint$default_aes
                             draw_panel = function(data, panel_scales, coord) {

                                 # transformation
                                 coords <- coord$transform(data, panel_scales)

                                 # subset
                                 if (!is.na(coords$n_max[1])) {
                                     coords <- coords %>%
                                         dplyr::group_by(y) %>%
                                         dplyr::top_n(coords$n_max[1])
                                 }

                                 # grob objects
                                 # vertical lines
                                 gL <- grid::polylineGrob(
                                     x = rep(coords$x, 2),
                                     y = c(coords$y, coords$y + 0.05),
                                     id = rep(1:nrow(coords), 2),
                                     gp = grid::gpar(
                                         col = coords$colour
                                     )
                                 )

                                 # labels
                                 gT <- grid::textGrob(
                                     label = coords$label,
                                     x = coords$x, y = coords$y + 0.06,
                                     just = "left", rot = 45
                                 )

                                 # tree
                                 grid::gTree(children = grid::gList(gL, gT))
                             }
)

#' Label timeline geom for NOAA earthquakes data
#'
#' geom_timeline_label is used for adding annotations to NOAA earthquake data.
#' "This geom adds a vertical line to each data point with a text
#' annotation (e.g. the location of the earthquake) attached to each line.
#' There should be an option to subset to n_max number of earthquakes,
#' where we take the n_max largest (by magnitude) earthquakes.
#' Aesthetics are x, which is the date of the earthquake and label
#' which takes the column name from which annotations will be obtained."
#' (Mastering Software Development in R Capstone, Week 2)
#'
#' @import grid
#'
#' @export
#'
#' @param mapping aesthetic mappings
#' @param data The datafram to create a timeline for
#' @param stat The stat to transform the data with, by default "identity"
#' @param position Adjusts the position of the layer in the grob
#' @param na.rm If FALSE, the default, missing values are removed with
#'              a warning. If TRUE, missing values are silently removed.
#' @param show.legend Display legends
#' @param inherit.aes Should the aes be inherited
#' @param ... Other args
#'
#' @return a ggplot timeline object with labels
#'
#' @examples
#' \dontrun{
#' paste0("https://www.ngdc.noaa.gov/",
#'        "nndc/struts/results?type_0=Exact&",
#'        "signif.txtquery_0=$ID&t=101650&s=13&",
#'        "d=189&dfn=signif.txt") %>%
#'     readr::read_delim(delim = "\t") %>%
#'     eq_clean_data() %>%
#'     dplyr::filter(COUNTRY = "ANTARTICA") %>%
#'     ggplot2::ggplot(aes(x = DATE, Y = COUNTRY)) +
#'     geom_timeline() +
#'     geom_timeline_label(aes(label = LOCATION_NAME),
#'                         n_max = 5)
#' }
#'

geom_timeline_label <- function(mapping = NULL, data = NULL, stat = "identity",
                                position = "identity", na.rm = FALSE,
                                show.legend = NA, inherit.aes = TRUE,
                                ...) {
    layer(
        geom = GeomTimelineLabel, mapping = mapping,
        data = data, stat = stat, position = position,
        show.legend = show.legend, inherit.aes = inherit.aes,
        params = list(na.rm = na.rm, ...)
    )
}

#' Interactive map visualization for earthquakes
#'
#' eq_map maps the epicenters (LATITUDE/LONGITUDE) with a radius proportional to
#' the earthquake's magnitude (EQ_PRIMARY) "and annotates each point with in pop
#' up window containing annotation data stored in a column of the data frame.
#' The user should be able to choose which column is used for the annotation in
#' the pop-up with a function argument named annot_col."
#' (Mastering Software Development in R Capstone, Week 3)
#'
#' @importFrom leaflet leaflet addTiles addCircleMarkers
#' @importFrom htmltools htmlEscape
#'
#' @export
#'
#' @param eq_df a cleaned earthquake data frame to map
#' @param annot_col the string name of a column to use for annotation
#'                  of the earthquake locations in eq_df (optional)
#'
#' @return a leaflet map object
#'
#' @examples
#' \dontrun{
#'     clean_NOAA_data %>%
#'          dplyr::filter(COUNTRY == "JAPAN" &
#'                        lubridate::year(DATE) >= 2000) %>%
#'          eq_map(annot_col = "DATE")
#' }
#'


eq_map <- function(eq_df, annot_col = NA){
    leaflet::leaflet(eq_df) %>%
        leaflet::addTiles() %>%
        leaflet::addCircleMarkers(
            radius = ~as.numeric(EQ_PRIMARY),
            fillOpacity = .5,
            stroke = FALSE,
            popup = stats::as.formula(paste0("~","htmltools::htmlEscape(",
                                             annot_col, ")"))
        )
}



#' HTML labeler for earthquakes leaflet map
#'
#' eq_create_label takes a row of the dataset as an argument and creates an HTML label
#' that can be used as the annotation text in the leaflet map." It puts together
#' "a character string for each earthquake that will show the cleaned location,
#' the magnitude (EQ_PRIMARY), and the total number of deaths (TOTAL_DEATHS),
#' with boldface labels for each ("Location", "Total deaths", and "Magnitude").
#' If an earthquake is missing values for any of these, both the label and the
#' value is skipped for that element of the tag."
#' (Mastering Software Development in R Capstone, Week 3)
#'
#' @export
#'
#' @param row a row of cleaned NOAA earthquake data to create a label for
#'
#' @return an html label string to add to an earthquake leaflet map object
#'
#' @examples
#' \dontrun{
#'     clean_NOAA_data %>%
#'          dplyr::filter(COUNTRY == "JAPAN" &
#'                        lubridate::year(DATE) >= 2000) %>%
#'          dplyr::mutate(popup_text = eq_create_label(.)) %>%
#'          eq_map(annot_col = "popup_text")
#' }
#'

eq_create_label <- function(row) {
    text <- ""
    text <- ifelse(!is.na(row$LOCATION_NAME),
                   paste(text, "<b>Location:</b>", row$LOCATION_NAME),
                   text)
    text <- ifelse(!is.na(row$EQ_PRIMARY),
                   paste(text, "<br/> <b>Magnitude:</b>", row$EQ_PRIMARY),
                   text)
    text <- ifelse(!is.na(row$TOTAL_DEATHS),
                   paste(text, "<br/> <b>Total deaths:</b>", row$TOTAL_DEATHS),
                   text)
    text
}
jdallmann/NOAAviz documentation built on Nov. 4, 2019, 2:35 p.m.