R/fars_functions.R

#' Import data
#'
#' @param filename name of a dataset as character string
#'
#' @return data.frame of class: data.frame and tbl_df
#' @importFrom readr read_csv
#' @importFrom dplyr tbl_df
#'
#' @export fars_read
#'
#' @description
#' \code{fars_read} imports data into R session as tbl_df.
#'
#' @details
#' This is a wrapper function, not very complicated, it only reads data from
#' disk. It stops if file is not found, returning a message. If it finds
#' the file on disk, it imports it, suppresssing messages generated by
#' \code{readr::read_csv} function. In the end, the function converts data to
#' \code{dplyr::tbl_df} even though conversion is already done by
#' \code{readr::read_csv} function.
#'
#' @examples
#' my_filename <- here::here("data/accident_2013.csv.bz2")
#' my_df <- fars_read(filename = my_filename)
#' my_df

fars_read <- function(filename) {
  if(!file.exists(filename))
    stop("file '", filename, "' does not exist")
  data <- suppressMessages({
    readr::read_csv(filename, progress = FALSE)
  })
  dplyr::tbl_df(data)
}

####----------------------------------------------------------------------------

#' Construct file name for a given year
#'
#' @param year integer year value
#'
#' @return filename string of class character
#'
#' @importFrom here here
#' @export make_filename
#'
#' @description
#' \code{make_filename} constructs filename string of class character using
#' argument year that is provided by user.
#'
#' @details
#' This function constructs a string that is a filename of dataset that one
#' would like to import into R session. By providing value for year, it will
#' construct the name using \code{sprintf} function in an appropriate manner
#' for dataset to be imported.
#'
#' @examples
#' my_year <- 2013
#' my_filename <- make_filename(year = my_year)
#' my_filename

make_filename <- function(year) {
  year <- as.integer(year)
  here::here("data", sprintf("accident_%d.csv.bz2", year))
}

####----------------------------------------------------------------------------

#' Import dataset for multiple years
#'
#' @param years vector of years as integers
#'
#' @return a list where each element of a list is an object of class tbl_df
#' and data.frame
#'
#' @export fars_read_years
#'
#' @import  dplyr
#'
#' @description
#' \code{fars_read_years} imports into R session a set of datasets at once,
#' returning a list of data.frames objects of class tbl_df, data.frame and tbl.
#' After importing, it adds column and fills it with a respective year number,
#' than selects columns Month and year.
#'
#' If dataset names to import do not contain year values provided by user,
#' the function returns NULL with a warning.
#'
#' @examples
#'
#' my_years <- 2013:2015
#' my_dataset_list <- fars_read_years(my_years)
#' my_dataset_list
#'

fars_read_years <- function(years) {
  lapply(years, function(year) {
    file <- make_filename(year)
    tryCatch({
      dat <- fars_read(file)
      dplyr::mutate(dat, year = year) %>%
        dplyr::select(MONTH, year)
    }, error = function(e) {
      warning("invalid year: ", year)
      return(NULL)
    })
  })
}

####----------------------------------------------------------------------------

#' Count observations by year and month
#'
#' @param years vector of years as integers
#'
#' @return object of class data.frame, tbl_df, tbl
#'
#' @import dplyr tidyr
#' @export fars_summarize_years
#'
#' @description
#' \code{fars_summarize_years} imports into R session a set of datasets at once,
#' binds rows of elements of list into a data.frame object,
#' counts observations by year and month, spreads result by year and value pair,
#' returning a data.frame object of class tbl_df, data.frame and tbl.
#'
#' @examples
#'
#' my_years <- 2013:2015
#' my_count_by_year_and_month_df <- fars_summarize_years(my_years)
#' my_count_by_year_and_month_df

fars_summarize_years <- function(years) {
  dat_list <- fars_read_years(years)
  dplyr::bind_rows(dat_list) %>%
    dplyr::group_by(year, MONTH) %>%
    dplyr::summarize(n = n()) %>%
    tidyr::spread(year, n)
}

####----------------------------------------------------------------------------

#' Plot accidents for chosen state on a map
#'
#' @param state.num state ID number
#' @param year value for year
#'
#' @return map plot
#'
#' @import dplyr
#' @importFrom maps map
#' @importFrom graphics points
#'
#' @export fars_map_state
#'
#' @description
#' \code{fars_map_state} plots geographical distribution of accidents on a map
#' of respective state chosen by user using R's base graphics system. Function
#' stops with a message if state number is not found in a dataset or returns
#' NULL if there are no accidents in a given state.
#'
#' @examples
#'
#' my_state_id <- 1
#' fars_map_state(my_state_id, 2015)
#'

fars_map_state <- function(state.num, year) {
  filename <- make_filename(year)
  data <- fars_read(filename)
  state.num <- as.integer(state.num)

  if(!(state.num %in% unique(data$STATE)))
    stop("invalid STATE number: ", state.num)
  data.sub <- dplyr::filter(data, STATE == state.num)
  if(nrow(data.sub) == 0L) {
    message("no accidents to plot")
    return(invisible(NULL))
  }
  is.na(data.sub$LONGITUD) <- data.sub$LONGITUD > 900
  is.na(data.sub$LATITUDE) <- data.sub$LATITUDE > 90
  with(data.sub, {
    maps::map("state", ylim = range(LATITUDE, na.rm = TRUE),
              xlim = range(LONGITUD, na.rm = TRUE))
    graphics::points(LONGITUD, LATITUDE, pch = 46)
  })
}
Struya/mtrostR documentation built on May 15, 2019, 4:18 a.m.