R/stations.R

#' Extract station matrix from SQLite3 database
#'
#' @param bikedb A string containing the path to the SQLite3 database.
#' If no directory specified, it is presumed to be in \code{tempdir()}.
#' @param city Optional city (or vector of cities) for which stations are to be
#' extracted
#'
#' @return Matrix containing data for each station
#'
#' @export
#'
#' @examples
#' \dontrun{
#' data_dir <- tempdir ()
#' bike_write_test_data (data_dir = data_dir)
#' # or download some real data!
#' # dl_bikedata (city = 'la', data_dir = data_dir)
#' bikedb <- file.path (data_dir, 'testdb')
#' store_bikedata (data_dir = data_dir, bikedb = bikedb)
#' # create database indexes for quicker access:
#' index_bikedata_db (bikedb = bikedb)
#'
#' stations <- bike_stations (bikedb)
#' head (stations)
#'
#' bike_rm_test_data (data_dir = data_dir)
#' bike_rm_db (bikedb)
#' # don't forget to remove real data!
#' # file.remove (list.files (data_dir, pattern = '.zip'))
#' }
bike_stations <- function (bikedb, city) {

    if (missing (bikedb))
        stop ("Can't get station data if bikedb isn't provided")

    bikedb <- check_db_arg (bikedb)

    db <- DBI::dbConnect (RSQLite::SQLite(), bikedb, create = FALSE)
    st <- tibble::as_tibble (DBI::dbReadTable (db, "stations"))
    DBI::dbDisconnect (db)

    if (!missing (city))
        st <- st [which (st$city %in% convert_city_names (city)), ]

    st$longitude <- as.numeric (st$longitude)
    st$latitude <- as.numeric (st$latitude)
    # some token/test stns don't have lat-lons, and these become NAs
    indx <- which (!is.na (st$longitude) & !is.na (st$latitude))
    st <- st [indx, ]
    # and some have lat-lons of zero, so remove these too
    indx <- which (abs (st$longitude) > 1e-6 &
                   abs (st$latitude) > 1e-6)
    st <- st [indx, ]

    return (st)
}

#' Get London station data from Transport for London (TfL)
#'
#' @param external If \code{TRUE}, download latest list of stations from
#' external TfL site, otherwise use potentially obsolete internal version.
#' @param quiet If \code{FALSE}, just declare getting stations (coz it can take
#' a while).
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in London's
#' Santander Cycles system
#'
#' @noRd
bike_get_london_stations <- function (external = TRUE, quiet = TRUE) {

    if (external)
        res <- get_london_stns_external (quiet = quiet)
    else
        res <- get_london_stns_internal ()

    return (res)
}

get_london_stns_internal <- function () {
    env <- new.env ()
    utils::data ("lo_stns", envir = env)
    return (env$lo_stns)
}

get_london_stns_external <- function (quiet = TRUE) {
    if (!quiet)
        message ("getting london stations ...", appendLF = FALSE)
    tfl_url <- "https://api.tfl.gov.uk/BikePoint"
    resp <- httr::GET (tfl_url)
    res <- NULL
    if (resp$status_code == 200) {

        doc <- httr::content (resp, encoding  =  "UTF-8")
        id <- unlist (lapply (doc, function (i)
                              strsplit (i$id, "BikePoints_") [[1]] [2]))
        name <- unlist (lapply (doc, function (i)
                                gsub ("'", "", i$commonName))) #nolint
        lon <- unlist (lapply (doc, function (i) i$lon))
        lat <- unlist (lapply (doc, function (i) i$lat))
        res <- data.frame (id = id, name = name, lon = lon, lat = lat,
                           stringsAsFactors = FALSE)
    }
    if (!quiet)
        message (" done")

    return (res)
}

#' Get Chicago station data
#'
#' @param flists List of files returned from bike_unzip_files_chicago
#'
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in
#' Chicago's Divvybikes system
#'
#' @noRd
bike_get_chicago_stations <- function (flists) {


    id <- name <- lon <- lat <- NULL
    for (f in flists$flist_csv_stns) {

        fi <- utils::read.csv (f, header = TRUE)
        id <- c (id, paste0 (fi$id))
        name <- c (name, paste0 (fi$name))
        lon <- c (lon, paste0 (fi$longitude))
        lat <- c (lat, paste0 (fi$latitude))
    }
    res <- data.frame (id = id, name = name, lon = lon, lat = lat,
                       stringsAsFactors = FALSE)
    res <- res [which (!duplicated (res)), ]

    return (res)
}

#' Get Boston station data
#'
#' @param flists List of files returned from bike_unzip_files, which includes
#' entries in \code{$flist_csv_stns}
#'
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in Boston's
#' Hubway system
#'
#' @noRd
bike_get_bo_stations <- function (flists, data_dir) {

    if (is.null (flists$flist_csv_stns)) {

        # then download station data ...
        dl_files <- get_bike_files (city = "bo")
        dl_files <- dl_files [which (grepl ("Stations", dl_files))]
        for (f in dl_files) {

            furl <- gsub (" ", "%20", f)
            f <- gsub (" ", "", f)
            destfile <- file.path (data_dir, basename(f))
            resp <- httr::GET (furl,
                               httr::write_disk (destfile, overwrite = TRUE))
            if (resp$status_code != 200) {

                count <- 0
                while (!file.exists (destfile) & count < 5) {

                    resp <- httr::GET (furl,
                                       httr::write_disk (destfile,
                                                         overwrite = TRUE))
                    count <- count + 1
                }
                if (!file.exists (destfile))
                    stop ("Download request failed")
            }
        }
        flists$flist_csv_stns <- file.path (data_dir, basename (dl_files))
    }

    id <- name <- lon <- lat <- NULL
    for (f in flists$flist_csv_stns) {

        f <- flists$flist_csv_stns [2]
        fi <- utils::read.csv (f, header = TRUE)
        if ("Station.ID" %in% names (fi)) {
            id <- c (id, paste0 (fi$Station.ID))
            name <- c (name, paste0 (fi$Station))
        } else {
            id <- c (id, paste0 (fi$Number))
            name <- c (name, paste0 (fi$Name))
        }
        lon <- c (lon, paste0 (fi$Longitude))
        lat <- c (lat, paste0 (fi$Latitude))
    }
    # Remove apostrophes from names coz they muck up sqlite fields:
    name <- gsub ("\'", "", name)
    res <- data.frame (id = id, name = name, lon = lon, lat = lat,
                       stringsAsFactors = FALSE)
    res <- res [which (!duplicated (res)), ]

    return (res)
}

#' Get Minneapolis/Minnesota station data
#'
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in Boston's
#' Hubway system
#'
#' @noRd
bike_get_mn_stations <- function (flists) {

    if (is.null (flists$flist_csv_stns))
        stop ("Station files must be in nominated data_dir")

    id <- name <- lon <- lat <- NULL
    for (f in flists$flist_csv_stns) {

        fi <- utils::read.csv (f, header = TRUE)
        idcol <- grep ("terminal|number", names (fi), ignore.case = TRUE)
        nmcol <- grep ("station|name", names (fi), ignore.case = TRUE)
        loncol <- grep ("lon", names (fi), ignore.case = TRUE)
        latcol <- grep ("lat", names (fi), ignore.case = TRUE)
        id <- c (id, paste0 (fi [[idcol]]))
        name <- c (name, paste0 (fi [[nmcol]]))
        lon <- c (lon, paste0 (fi [[loncol]]))
        lat <- c (lat, paste0 (fi [[latcol]]))
    }
    # Remove apostrophes from names coz they muck up sqlite fields:
    name <- gsub ("\'", "", name)
    res <- data.frame (id = id, name = name, lon = lon, lat = lat,
                       stringsAsFactors = FALSE)
    res <- res [which (!duplicated (res)), ]

    indx <- which (res$lon != "N/A" & res$lon != "NA" &
                   res$lat != "N/A" & res$lat != "NA")
    return (res [indx, ])
}

#' Get Montreal station data
#'
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in
#' Montreal's Bixi system
#'
#' @noRd
bike_get_mo_stations <- function (flists) {

    if (is.null (flists$flist_csv_stns))
        stop ("Station files must be in nominated data_dir")

    id <- name <- lon <- lat <- NULL
    for (f in flists$flist_csv_stns) {

        fi <- utils::read.csv (f, header = TRUE)
        idcol <- grep ("code", names (fi), ignore.case = TRUE)
        nmcol <- grep ("name", names (fi), ignore.case = TRUE)
        loncol <- grep ("longitude", names (fi), ignore.case = TRUE)
        latcol <- grep ("latitude", names (fi), ignore.case = TRUE)
        id <- c (id, paste0 (fi [[idcol]]))
        name <- c (name, paste0 (fi [[nmcol]]))
        lon <- c (lon, paste0 (fi [[loncol]]))
        lat <- c (lat, paste0 (fi [[latcol]]))
    }
    # Remove apostrophes from names coz they muck up sqlite fields:
    name <- gsub ("\'", "", name)
    res <- data.frame (id = id, name = name, lon = lon, lat = lat,
                       stringsAsFactors = FALSE)
    res <- res [which (!duplicated (res)), ]

    indx <- which (res$lon != "N/A" & res$lon != "NA" &
                   res$lat != "N/A" & res$lat != "NA")
    return (res [indx, ])
}

#' Get Washington DC station data
#'
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in
#' Washington DC's Capital Bike Share system
#'
#' @note This data is available online from
#' http://opendata.dc.gov/datasets/capital-bike-share-locations/
#' but this is a wrapper around an opendata.argis.com server that is not
#' reliable because it very commonly returns errors and fails to retrieve the
#' data. The relevant data were therefore downloaded and stored in
#' R/sysdata.rda. These data will need updating as the system expands in the
#' future.
#'
#' @noRd
bike_get_dc_stations <- function () {

    # rm apostrophes from names (only "L'Enfant Plaza"):
    # stations_dc is lazy loaded from R/sysdata.rda
    name <- noquote (gsub ("'", "", sysdata$stations_dc$name)) #nolint
    name <- trimws (name, which = "right") # trim terminal white space
    res <- data.frame (id = sysdata$stations_dc$id,
                       name = name,
                       lon = sysdata$stations_dc$lon,
                       lat = sysdata$stations_dc$lat,
                       stringsAsFactors = FALSE)

    return (res)
}

#' Get Guadalajara stations
#'
#' @return \code{data.frame} of (id, name, lon, lat) of all stations in
#' Gaudalajara's mibici system
#'
#' @noRd
bike_get_gu_stations <- function () {

    # the name of this station file changes, but it's always called
    # "nomenclatura" something, and is the only file with this word in its name
    link <- "https://www.mibici.net/en/open-data/"
    hrefs <- httr::content (httr::GET (link), encoding = "UTF-8") %>%
        xml2::xml_children () %>%
        xml2::xml_find_all (".//a") %>%
        xml2::xml_attr ("href")
    link <- paste0 ("https://www.mibici.net",
                    hrefs [grep ("nomenclatura", hrefs, ignore.case = TRUE)])
    if (length (link) > 1)
        link <- link [length (link)] # latest version

    suppressMessages (
                      dat <- httr::GET (link) %>%
                          httr::content (encoding = "UTF-8", as = "parsed")
                      )

    # Remove apostrophes from names coz they muck up sqlite fields:
    nm <- gsub ("\"", "", dat$name)
    nm <- gsub ("\'", "", nm)
    res <- data.frame (id = dat$obcn, name = nm,
                       lon = dat$longitude, lat = dat$latitude,
                       stringsAsFactors = FALSE)
    res <- res [which (!duplicated (res)), ]
    res <- res [which (!res$id == "NULL"), ]

    return (res)
}
ropensci/bikedata documentation built on Feb. 7, 2024, 6:04 a.m.