R/get_matrices.R

#' dd_get_tripdistsmatS
#'
#' Function to calculate station-to-station trip and distance matrices from a
#' database generated with \code{bikedata::store_bikedb}. This function
#' (re-)generates the internally bundled \link{distmats} and \link{tripmats}
#' data. It should only need to be used to generate data that are more recent
#' that these internally bundled versions.
#'
#' @param bikedb Name of database
#'
#' @note The directory containing the database can be retrieveed
#' with \link{dd_get_data_dir}, and set with \link{dd_set_data_dir}. The data
#' generated by this function can replace current internal data by running
#' \code{save(res$tripmats,
#' system.file("data","tripmats.rda",package="distdecay")}, and
#' \code{save(res$distmats,
#' system.file("data","distmats.rda",package="distdecay")}.
#'
#' @export
dd_get_tripdistmats <- function (bikedb)
{
    if (!file.exists (file.path (dd_get_data_dir (), bikedb)))
        stop ("bikedb not found; please set directory with dd_set_data_dir")

    message ("\nExtracting Trip and Distance matrices ...")
    tripmats_in <- distmats_in <- list ()
    cities <- c ('ny', 'bo', 'ch', 'dc', 'la', 'lo', 'ph')
    for (ci in cities)
    {
        message ("----------", ci, "----------")
        tm <- bike_tripmat (bikedb = bikedb, city = ci,
                            standardise = TRUE, quiet = FALSE)
        dm <- bike_distmat (bikedb = bikedb, city = ci, quiet = FALSE)
        tdm <- bike_match_matrices (tm, dm)
        tripmats_in [[ci]] <- tdm$trip
        distmats_in [[ci]] <- tdm$dist
    }
    return (list (tripmats = tripmats_in, distmats = distmats_in))
}


#' dd_get_vecs
#'
#' Get data.frame of distances (\code{d}) and either covariances of mutual
#' information statistics (\code{n}) for a given city
#' @noRd
dd_get_vecs <- function (city, from = TRUE, mi = FALSE)
{
    if (mi)
        n <- dd_mi (city)
    else
        n <- dd_cov (city)
    d <- distmats [[city]]

    if (from)
    {
        d <- d [lower.tri (d)]
        n <- n [lower.tri (n)]
    } else
    {
        d <- d [upper.tri (d)]
        n <- n [upper.tri (n)]
    }

    indx <- which (!is.na (n) & !is.na (d) & n > 0)
    if (length (indx) == 0)
        stop ("No or insufficient data available for ", city)
    data.frame (d = d [indx], n = n [indx])
}

#' Calculate distance matrix between pairs of points using the google API
#'
#' @param xy A two-column matrix of lon-lat coordinates
#' @param dmat If submitted, any missing values not returned in previous queries
#' are filled.
#' @param g_units The units of measurement requested from the google API
#' @param g_mode The mode of transport requested from the google API
#' 
#' @export
#'
#' @note the google server frequently returns no values, so this produces a
#' matrix with lots of missing values.
#'
#' @examples
#' \dontrun{
#' test_fn ()
#' }
distmat_g <-  function (xy, dmat, g_units = "metric", g_mode = "bicycling")
{
    base_url <- paste0 ("https://maps.googleapis.com/maps/api/",
                        "distancematrix/json?units=", g_units)

    nst <- nrow (xy)
    if (missing (dmat))
        dmat <- array (NA, dim = rep (nst, 2))

    nmax <- 30 # maximal number of distances per request; approx <= 37
    gr_indx <- group_index (seq (nst), nmax = nmax)

    # Getting google responses is the slowest bit by miles, so loops okay here
    pb <- txtProgressBar (max = 1, style = 3)
    for (i in seq (nst))
    {
        from <- paste (xy [i, 2], xy [i, 1], sep = ",")
        for (j in seq (gr_indx))
        {
            if (any (is.na (dmat [i, gr_indx [[j]] ])))
            {
                to <- paste (xy [gr_indx [[j]], 2],
                             xy [gr_indx [[j]], 1], sep = ",")
                to <- paste0 (to, collapse = "|")

                url_travel <- paste0 (base_url, "&origins=", from,
                                      "&destinations=", to, "&mode=", g_mode)
                url <- utils::URLencode(url_travel, repeated = FALSE,
                                        reserved = FALSE)
                obj <- jsonlite::fromJSON(url)
                if (obj$status == "OK")
                {
                    dists <- obj$rows$elements [[1]]$distance$value
                    if (length (dists) > 0)
                        dmat [i, gr_indx [[j]] ] <- dists
                }
            }
        }
        setTxtProgressBar(pb, i / nst)
    }
    close (pb)

    return (dmat)
}

group_index <- function (x, nmax = 30)
{
    if (length (x) <= nmax)
        group_index <- seq (x)
    else
    {
        ngr <- ceiling (length (x) / nmax)
        group_index <- by (seq (x), cut (seq (x), ngr), FUN = I)
    }

    return (group_index)
}
mpadge/distdecay documentation built on May 24, 2019, 6:08 a.m.