R/fars_functions.R

#' Reads the data from file
#'
#' Data loading tool that reads the data from a comma separated
#' file after checking for its existence.
#'
#' @param filename The path to the file to be read relative to
#' the current working directory.
#'
#' @returns This function returns the data in a tbl_df object
#' or an error message if the given path does not point to an
#' existing file.
#'
#' @importFrom readr read_csv
#' @importFrom dplyr tbl_df
#'
#' @examples
#' \dontrun{
#' fars_read("accident_2013.csv.bz2")
#' fars_read("accident_2015.csv.bz2")
#' }
#' @export
fars_read <- function(filename) {
        if(!file.exists(filename))
                stop("file '", filename, "' does not exist")
        data <- suppressMessages({
                readr::read_csv(filename, progress = FALSE)
        })
        dplyr::tbl_df(data)
}

#'Creates a filename
#'
#'A tool that creates a string of the path pointing to the
#'comma separated file containing data on the given year.
#'
#'@param year The year of interest
#'
#'@returns A string with the path pointing to the datafile
#'corresponding to year.
#'
#'@examples
#'make_filename(2015)
#'
#'@export
make_filename <- function(year) {
        year <- as.integer(year)
        sprintf("accident_%d.csv.bz2", year)
}

#' Month year index.
#'
#' A function that creates a year/month index for each accident that
#' occurs in specified years. An error message is given if data is not
#' available for any of the years.
#'
#' @param years A vector of years specifying what data is to be retrived.
#'
#' @returns A list of tbl_df-s for each year specified with a NULL entry
#' for years for which data is unavailable.
#'
#' @importFrom dplyr mutate select
#' @importFrom magrittr %>%
#'
#' @examples
#' fars_read_years(2013)
#' fars_read_years(c(2012, 2013, 2015))
#'
#' @export
fars_read_years <- function(years) {
        lapply(years, function(year) {
                file <- make_filename(year)
                tryCatch({
                        dat <- fars_read(file)
                        dplyr::mutate(dat, year = year) %>%
                                dplyr::select(MONTH, year)
                }, error = function(e) {
                        warning("invalid year: ", year)
                        return(NULL)
                })
        })
}

#' Monthly accident count
#'
#' This function gives a monthly breakdown of accident counts for
#' specified years. An error message is given if data is not available
#' for any of the given years.
#'
#' @param years vector of years for which a summary is to be done
#'
#' @returns a tbl_df object with a column for each year specified
#'
#' @importFrom dplyr bind_rows group_by summarize
#' @importFrom tidyr spread
#'
#' @examples
#' \dontrun{
#' fars_summarize_years(2013)
#' fars_summarize_years(c(2012, 2013, 2015))
#' }
#' @export
fars_summarize_years <- function(years) {
        dat_list <- fars_read_years(years)
        dplyr::bind_rows(dat_list) %>%
                dplyr::group_by(year, MONTH) %>%
                dplyr::summarize(n = n()) %>%
                tidyr::spread(year, n)
}

#' Map accidents in a state in a year
#'
#' This tool creates a plot of the geographical location of accidents
#' over the map of a given state in a given year. An error message is
#' generated if the given state.num does not represent a valid state
#' or if there were no accidents in the given state in the given year
#' or if data is not available for the given year in the first place.S
#'
#' @param state.num The number code of the state of interest.
#'
#' @param year The year of interest.
#'
#' @returns A plot of the given state's map with dots indicating the
#' geographical location of each accident in the given year.
#'
#' @importFrom dplyr filter
#' @importFrom maps map
#' @importFrom graphics points
#'
#' @examples
#' \dontrun{
#' fars_map_state(1, 2015)
#' }
#' @export
fars_map_state <- function(state.num, year) {
        filename <- make_filename(year)
        data <- fars_read(filename)
        state.num <- as.integer(state.num)

        if(!(state.num %in% unique(data$STATE)))
                stop("invalid STATE number: ", state.num)
        data.sub <- dplyr::filter(data, STATE == state.num)
        if(nrow(data.sub) == 0L) {
                message("no accidents to plot")
                return(invisible(NULL))
        }
        is.na(data.sub$LONGITUD) <- data.sub$LONGITUD > 900
        is.na(data.sub$LATITUDE) <- data.sub$LATITUDE > 90
        with(data.sub, {
                maps::map("state", ylim = range(LATITUDE, na.rm = TRUE),
                          xlim = range(LONGITUD, na.rm = TRUE))
                graphics::points(LONGITUD, LATITUDE, pch = 46)
        })
}
Moosquibe/BuildingRPackagesFinalProject documentation built on May 27, 2019, 11:43 a.m.