R/capcode.R

Defines functions eq_create_label eq_map geom_timeline_label stat_timeline_label geom_timeline stat_time eq_location_clean eq_clean_data

Documented in eq_clean_data eq_create_label eq_location_clean eq_map geom_timeline geom_timeline_label stat_time stat_timeline_label

#----------------------Clean the data------------------------
#---Create two functions that clean the data for the geoms---
#------------------------------------------------------------

utils::globalVariables(c("YEAR", "MONTH", "DAY",
                         "LONGITUDE", "LATITUDE",
                         "LOCATION_NAME", "popup_text",
                         "EQ_PRIMARY", "DEATHS",
                         "StatTimeline", "y"))


#' @title DATE Column Creation
#'
#' @description This function takes a data frame, ideally from the NOAA Significant Earthquakes dataset, and creates a DATE column. It also coerses LATITUDE and LONGITUDE into numerics. This function is meant to be used in the geoms of this package, so it does not need to be exported.
#'
#' @param data A data frame with YEAR, MONTH, DAY, LONGITUDE and LATITUDE columns.
#'
#' @return A data frame with a new column named DATE, combining the three columns into "%Y %m %d" format, and the LONGITUDE, LATITUDE columns coersed into numerics.
#' @import dplyr
#' @import magrittr
#' @export
#'
#' @examples \dontrun{load("data/data.rda")
#' data<-eq_clean_data(data)}
eq_clean_data<-function(data){

  data<-data %>%
    # Create a Date column
    mutate(DATE=as.Date(paste(YEAR, MONTH, DAY),
                        format="%Y %m %d")) %>%
    # Turn Long and Lat into numeric
    mutate(LONGITUDE=as.numeric(LONGITUDE)) %>%
    mutate(LATITUDE=as.numeric(LATITUDE))

    return(data)
}


#' @title LOCATION_NAME Column cleanup
#'
#' @description This function takes a data frame, ideally from the NOAA Significant Earthquakes dataset, and cleans the LOCATION_NAME column. Specifically, it deletes the name of the country, if there is one included in the string, leaving only the region name. It also changes the string into title case. This function is meant to be used in the geoms of this package, so it does not need to be exported.
#'
#' @param data A data frame with a LOCATION_NAME column.
#'
#' @return A data frame with the LOCATION_NAME column changed. Specifically, it deletes the name of the country, if there is one included in the string, leaving only the region name. It also changes the string into title case.
#' @import dplyr
#' @import magrittr
#' @import stringr
#' @export
#'
#' @examples
#' \dontrun{load("data/data.rda")
#' data<-eq_location_clean(data)}
eq_location_clean<-function(data){

 data<-data %>%
   # If the name has a colon, delete what comes before that
   mutate(LOCATION_NAME=ifelse(str_detect(LOCATION_NAME, ":"),
                               str_split(LOCATION_NAME,
                                         pattern=":  ",
                                         simplify=T)[,2],
                               as.character(LOCATION_NAME))) %>%
   # Turn the name to title case
   mutate(LOCATION_NAME=str_to_title(LOCATION_NAME))

 return(data)
}




#---------------------GEOM 1: timeline-----------------------
#------Create a geom to plot the earthquake timelines--------
#------------------------------------------------------------

# Create a stat that filters the dates and countries
StatTime <- ggproto("StatTime", Stat,

          required_aes = c("x"),

          compute_group = function(data, scales,
                                   xmin=-Inf, xmax=+Inf,
                                   y=NULL) {

            data<-data %>%
              dplyr::filter(!is.na(x)) %>%
              dplyr::filter(x>=xmin & x<=xmax)

            if (!is.null(y)){
              data<-data %>%
                dplyr::filter(COUNTRY %in% y)
            }

            data
         }
)


#' @title Timeline Stat
#'
#' @description This stat is used in conjuction with the timeline geom. It filters the requested dates and countries, before passing the data to geom_timeline().
#'
#' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.
#' @param data The data to be displayed in this layer. There are three options: If NULL, the default, the data is inherited from the plot data as specified in the call to ggplot(). A data.frame, or other object, will override the plot data. All objects will be fortified to produce a data frame. See fortify() for which variables will be created. A function will be called with a single argument, the plot data. The return value must be a data.frame, and will be used as the layer data. A function can be created from a formula (e.g. ~ head(.x, 10)).
#' @param geom Use to override the default connection with geom_timeline()
#' @param position Position adjustment, either as a string, or the result of a call to a position adjustment function.
#' @param show.legend 	logical. Should this layer be included in the legends? NA, the default, includes if any aesthetics are mapped. FALSE never includes, and TRUE always includes. It can also be a named logical vector to finely select the aesthetics to display.
#' @param inherit.aes If FALSE, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. borders().
#' @param na.rm If FALSE, the default, missing values are removed with a warning. If TRUE, missing values are silently removed.
#' @param xmin Date: The minimum date to be concluded in the plot
#' @param xmax Date: The maximum date to be concluded in the plot
#' @param ... Other arguments passed on to layer(). These are often aesthetics, used to set an aesthetic to a fixed value, like colour = "red" or size = 3. They may also be parameters to the paired geom/stat.
#'
#' @import ggplot2
#' @import dplyr
#' @import magrittr
#' @export
#'
#' @examples \dontrun{load("data/data.rda")
#' ggplot(data) +
#' stat_time(geom="timeline")}
stat_time<-function(mapping=NULL, data=NULL,
                      geom = "timeline",
                      position="identity",
                      show.legend = NA,
                      inherit.aes = TRUE,
                      na.rm=TRUE, xmin=-Inf, xmax=+Inf, ...) {
  ggplot2::layer(
    stat = StatTimeline,
    data = data,
    mapping = mapping,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(na.rm=na.rm,
                  xmin=xmin, xmax=xmax, ...)
  )
}

# Create a geom that plots segments and points
GeomTimeline<-ggproto("GeomTimeline", Geom,

    required_aes = c("x"),

    default_aes = aes(y=0.5,
                      shape = 19,
                      colour = "black",
                      alpha=1,
                      fill="black",
                      size=1,
                      lty=1),

    draw_key = draw_key_point,

    draw_panel=function(data, panel_params, coord) {
      n <- nrow(data)
      if (n < 2) return(grid::nullGrob())

      coords<-coord$transform(data, panel_params)

      first_row<-coords[1, , drop=FALSE]

      line<-grid::segmentsGrob(
            x0=min(coords$x),
            x1=max(coords$x),
            y0=coords$y,
            y1=coords$y,
            default.units = "native",
            gp = grid::gpar(
              col = first_row$colour,
              lty = first_row$linetype
            )
          )

      point<-grid::pointsGrob(
              x=coords$x,
              y=coords$y,
              pch=coords$shape,
              gp=grid::gpar(col=coords$colour,
                            size=coords$size,
                            alpha=coords$alpha)
            )

    grid::gTree(children = grid::gList(line, point))
    }
)

#' @title Timeline Geom
#'
#' @description This geom depicts the earthquake data from the NOAA Significant Earthquakes dataset as a timeline with points for each earthquake. It works best if the user chooses to group the data by country.
#'
#' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.
#' @param data The data to be displayed in this layer. There are three options: If NULL, the default, the data is inherited from the plot data as specified in the call to ggplot(). A data.frame, or other object, will override the plot data. All objects will be fortified to produce a data frame. See fortify() for which variables will be created. A function will be called with a single argument, the plot data. The return value must be a data.frame, and will be used as the layer data. A function can be created from a formula (e.g. ~ head(.x, 10)).
#' @param stat The statistical transformation to use on the data for this layer, as a string.
#' @param position Position adjustment, either as a string, or the result of a call to a position adjustment function.
#' @param na.rm If FALSE, the default, missing values are removed with a warning. If TRUE, missing values are silently removed.
#' @param show.legend 	logical. Should this layer be included in the legends? NA, the default, includes if any aesthetics are mapped. FALSE never includes, and TRUE always includes. It can also be a named logical vector to finely select the aesthetics to display.
#' @param inherit.aes 	If FALSE, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. borders().
#' @param ... 	Other arguments passed on to layer(). These are often aesthetics, used to set an aesthetic to a fixed value, like colour = "red" or size = 3. They may also be parameters to the paired geom/stat.
#'
#' @import ggplot2
#' @import magrittr
#' @import grid
#' @export
#'
#' @examples \dontrun{
#' # Read the data
#' load("data/data.rda")
#' # Subset the data for plotting examples
#' data2<-data %>%
#'  filter(COUNTRY %in% c("USA", "CHINA"))
#' ggplot(data2, aes(x=DATE, y=COUNTRY, size=EQ_PRIMARY, color=DEATHS)) +
#'  geom_timeline(alpha=0.3,
#'               xmin=as.Date("2012-01-01"),
#'               xmax=as.Date("2015-01-01")) +
#' theme(axis.line.x = element_line(colour="black", size=1),
#'       axis.title.y=element_blank(),
#'        legend.position="bottom",
#'        panel.background=element_blank())+
#'  labs(size="Richter scale value",
#'       color="# deaths")}
geom_timeline<-function(mapping = NULL, data = NULL,
                        stat = "time", position = "identity",
                        na.rm = TRUE, show.legend = NA,
                        inherit.aes = TRUE, ...) {

  layer(
    geom=GeomTimeline, mapping=mapping, data=data,
    stat=stat, position=position, show.legend=show.legend,
    inherit.aes=inherit.aes,
    params= list(na.rm = na.rm, ...)
  )
}


#------------------GEOM 2: timeline_label--------------------
#------Create a geom to plot the location name labels--------
#------------------------------------------------------------

StatTimelineLabel <- ggproto("StatTimelineLabel", Stat,

     required_aes = c("x"),

     compute_group = function(data, scales,
                              xmin=-Inf, xmax=+Inf,
                              y=NULL, n_max=NULL) {
       data<-data %>%
         dplyr::filter(!is.na(x)) %>%
         dplyr::filter(x>=xmin & x<=xmax) %>%
         dplyr::filter(label!="")

       if (!is.null(y)){
         data<-data %>%
           dplyr::filter(COUNTRY %in% y)
       }

       if (!is.null(n_max)){
         data<-data %>%
           dplyr::arrange(desc(magnitude))
         data<-data[1:n_max,]
       }

       data
     }
)


#' @title Timeline Label Stat
#'
#' @description This stat is used in conjuction with the timeline_label geom. It filters the requested dates and countries, as well as keeps the n_max earthquakes with the highest magnitude, before passing the data to geom_timeline().
#'
#' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.
#' @param data The data to be displayed in this layer. There are three options: If NULL, the default, the data is inherited from the plot data as specified in the call to ggplot(). A data.frame, or other object, will override the plot data. All objects will be fortified to produce a data frame. See fortify() for which variables will be created. A function will be called with a single argument, the plot data. The return value must be a data.frame, and will be used as the layer data. A function can be created from a formula (e.g. ~ head(.x, 10)).
#' @param geom Use to override the default connection with geom_timeline()
#' @param position Position adjustment, either as a string, or the result of a call to a position adjustment function.
#' @param show.legend 	logical. Should this layer be included in the legends? NA, the default, includes if any aesthetics are mapped. FALSE never includes, and TRUE always includes. It can also be a named logical vector to finely select the aesthetics to display.
#' @param inherit.aes If FALSE, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. borders().
#' @param na.rm If FALSE, the default, missing values are removed with a warning. If TRUE, missing values are silently removed.
#' @param xmin Date: The minimum date to be concluded in the plot
#' @param xmax Date: The maximum date to be concluded in the plot
#' @param ... Other arguments passed on to layer(). These are often aesthetics, used to set an aesthetic to a fixed value, like colour = "red" or size = 3. They may also be parameters to the paired geom/stat.
#' @param n_max Numeric: The number of the earthquakes with the highest magnitude, whose labels will be shown in the plot.
#'
#' @import dplyr
#' @import magrittr
#' @import ggplot2
#' @export
#'
#' @examples \dontrun{# Read the data
#' load("data/data.rda")
#' # Subset the data for plotting examples
#' data2<-data %>%
#'  filter(COUNTRY %in% c("USA", "CHINA"))
#' ggplot(data2) +
#' stat_timeline_label(geom="timeline_label")}
stat_timeline_label<-function(mapping=NULL, data=NULL,
                              geom = "timeline_label",
                              position="identity",
                              show.legend = NA,
                              inherit.aes = TRUE,
                              na.rm=FALSE,
                              xmin=-Inf, xmax=+Inf,
                              n_max=NULL, ...) {
  ggplot2::layer(
    stat = StatTimelineLabel,
    data = data,
    mapping = mapping,
    geom = geom,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(na.rm=na.rm,
                  xmin=xmin, xmax=xmax, n_max=n_max, y=y, ...)
  )
}

GeomTimelineLabel<-ggproto("GeomTimelineLabel", Geom,

     required_aes = c("x"),

     default_aes = aes(y=0.5,
                       shape = 19,
                       colour = "black",
                       alpha=1,
                       fill="black",
                       size=1,
                       lty=1),

     draw_key = draw_key_point,

     draw_panel=function(data, panel_params, coord) {
       n <- nrow(data)
       if (n < 2) return(grid::nullGrob())

       coords<-coord$transform(data, panel_params)

       first_row<-coords[1, , drop=FALSE]

       line<-grid::segmentsGrob(
         x0=coords$x,
         x1=coords$x,
         y0=coords$y,
         y1=coords$y+0.05,
         default.units = "native"
       )

       point<-grid::textGrob(
         label=coords$label,
         x=coords$x,
         y=coords$y+0.05,
         rot=45,
         just="left"
       )

       grid::gTree(children = grid::gList(line, point))
     }
)

#' @title Timeline Label Geom
#'
#' @description This geom adds the earthquake location name, from the NOAA Significant Earthquakes dataset, The user can choose the number of the earthquake labels to appear in the plot. It works best in conjuction with the geom_timeline().
#'
#' @param mapping Set of aesthetic mappings created by aes() or aes_(). If specified and inherit.aes = TRUE (the default), it is combined with the default mapping at the top level of the plot. You must supply mapping if there is no plot mapping.
#' @param data The data to be displayed in this layer. There are three options: If NULL, the default, the data is inherited from the plot data as specified in the call to ggplot(). A data.frame, or other object, will override the plot data. All objects will be fortified to produce a data frame. See fortify() for which variables will be created. A function will be called with a single argument, the plot data. The return value must be a data.frame, and will be used as the layer data. A function can be created from a formula (e.g. ~ head(.x, 10)).
#' @param stat The statistical transformation to use on the data for this layer, as a string.
#' @param position Position adjustment, either as a string, or the result of a call to a position adjustment function.
#' @param na.rm If FALSE, the default, missing values are removed with a warning. If TRUE, missing values are silently removed.
#' @param show.legend 	logical. Should this layer be included in the legends? NA, the default, includes if any aesthetics are mapped. FALSE never includes, and TRUE always includes. It can also be a named logical vector to finely select the aesthetics to display.
#' @param inherit.aes 	If FALSE, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. borders().
#' @param ... 	Other arguments passed on to layer(). These are often aesthetics, used to set an aesthetic to a fixed value, like colour = "red" or size = 3. They may also be parameters to the paired geom/stat.
#'
#' @import ggplot2
#' @import magrittr
#' @import grid
#' @export
#'
#' @examples \dontrun{
#' # Read the data
#' load("data/data.rda")
#' # Subset the data for plotting examples
#' data2<-data %>%
#'  filter(COUNTRY %in% c("USA", "CHINA"))
#' ggplot(data2, aes(x=DATE, y=COUNTRY, size=EQ_PRIMARY,
#'                   color=DEATHS, label=LOCATION_NAME,
#'                   magnitude=EQ_PRIMARY)) +
#'  geom_timeline(alpha=0.3,
#'                xmin=as.Date("2000-01-01"),
#'                xmax=as.Date("2017-01-01")) +
#'  geom_timeline_label(xmin=as.Date("2000-01-01"),
#'                      xmax=as.Date("2015-01-01"),
#'                      n_max=5)+
#'  theme(axis.line.x = element_line(colour="black", size=1),
#'        axis.title.y=element_blank(),
#'        legend.position="bottom",
#'        panel.background=element_blank())+
#'  labs(size="Richter scale value",
#'       color="# deaths")}
geom_timeline_label<-function(mapping = NULL, data = NULL,
                              stat = "timeline_label",
                              position = "identity",
                              na.rm = FALSE, show.legend = NA,
                              inherit.aes = TRUE, ...) {

  layer(
    geom=GeomTimelineLabel, mapping=mapping, data=data,
    stat=stat, position=position, show.legend=show.legend,
    inherit.aes=inherit.aes,
    params= list(na.rm = na.rm, ...)
  )
}





#--------------------------Leaflet---------------------------
#----Create an interactive map to depict the earthquakes-----
#------------------------------------------------------------


#' @title Interactive Pop-Up Map
#'
#' @description This function creates an interactive map that shows the location of the earthquakes in the NOAA Significant Earthquakes dataset. The locations are depicted in circular markers which pop-up more information about the earthquakes, if selected.
#'
#' @param data A data frame from the NOAA Significant Earthquakes dataset.
#' @param annot_col The column of the data frame to display informations from when a marker is poped-up.
#'
#' @return An object of class "leaflet" (An interactive map)
#'
#' @import leaflet
#' @import dplyr
#' @import magrittr
#' @export
#'
#' @examples \dontrun{load("data/data.rda")
#' data %>%
#'eq_clean_data() %>%
#'  dplyr::filter(COUNTRY == "MEXICO" & lubridate::year(DATE) >= 2000) %>%
#'  eq_map(annot_col = "DATE")}
eq_map<-function(data, annot_col){

  # Prepare a pop-up column in the data
  popup_info=data[,annot_col]
  data<-data.frame(data, popup_info)

  # Create the leaflet map
  data %>%
    leaflet() %>%
    addTiles() %>%
    addCircleMarkers(radius = ~ EQ_PRIMARY,
                     lng = ~ LONGITUDE,
                     lat = ~ LATITUDE,
                     popup = ~ popup_info)


}




#' @title Labels for Interactive Map
#'
#' @description This function creates detailed labels for the interactive map, so that the pop-up markers include more information (Location name, magnitude of the earthquake and number of victims) about the earthquakes.
#'
#' @param data A data frame from the NOAA Significant Earthquakes dataset.
#'
#' @return Character: A vector with the label text
#'
#' @import leaflet
#' @import dplyr
#' @import magrittr
#'
#' @export
#'
#' @examples \dontrun{load("data/data.rda")
#'data %>%
#'  eq_clean_data() %>%
#'  dplyr::filter(COUNTRY == "MEXICO" & lubridate::year(DATE) >= 2000) %>%
#'  dplyr::mutate(popup_text = eq_create_label(.)) %>%
#'  eq_map(annot_col = "popup_text")}
eq_create_label<-function(data){

  data<-eq_location_clean(data)

  data<-data.frame(data,
                   popup_text=rep("", times=dim(data)[1]))

  data<-data %>%
    mutate(popup_text=ifelse(is.na(LOCATION_NAME),
                             popup_text,
                             paste(popup_text,
                                   "<b>Location:</b>", LOCATION_NAME, "<br />")),
           popup_text=ifelse(is.na(EQ_PRIMARY),
                             popup_text,
                             paste(popup_text,
                                   "<b>Magnitude:</b>", EQ_PRIMARY, "<br />")),
           popup_text=ifelse(is.na(DEATHS),
                             popup_text,
                             paste(popup_text,
                                   "<b>Total Deaths:</b>", DEATHS, "<br />")))
  data$popup_text
}
thechibo/earthquakes documentation built on April 15, 2020, 4:56 p.m.