R/centrality.R

#' Calculate betweenness centrality for a 'dodgr' network.
#'
#' Centrality can be calculated in either vertex- or edge-based form.
#'
#' @param graph 'data.frame' or equivalent object representing the network
#' graph (see Details)
#' @param contract If 'TRUE', centrality is calculated on contracted graph
#' before mapping back on to the original full graph. Note that for street
#' networks, in particular those obtained from the \pkg{osmdata} package, vertex
#' placement is effectively arbitrary except at junctions; centrality for such
#' graphs should only be calculated between the latter points, and thus
#' 'contract' should always be 'TRUE'.
#' @param edges If 'TRUE', centrality is calculated for graph edges, returning
#' the input 'graph' with an additional 'centrality' column; otherwise
#' centrality is calculated for vertices, returning the equivalent of
#' 'dodgr_vertices(graph)', with an additional vertex-based 'centrality' column.
#' @param column Column of graph defining the edge properties used to calculate
#' centrality (see Note).
#' @param vert_wts Optional vector of length equal to number of vertices
#' (`nrow(dodgr_vertices(graph))`), to enable centrality to be calculated in
#' weighted form, such that centrality measured from each vertex will be
#' weighted by the specified amount.
#' @param dist_threshold If not 'NULL', only calculate centrality for each point
#' out to specified threshold. Setting values for this will result in
#' approximate estimates for centrality, yet with considerable gains in
#' computational efficiency. For sufficiently large values, approximations will
#' be accurate to within some constant multiplier. Appropriate values can be
#' established via the \link{estimate_centrality_threshold} function.
#' @param heap Type of heap to use in priority queue. Options include
#' Fibonacci Heap (default; 'FHeap'), Binary Heap ('BHeap'),
#' Trinomial Heap ('TriHeap'), Extended Trinomial Heap
#' ('TriHeapExt', and 2-3 Heap ('Heap23').
#' @param check_graph If `TRUE`, graph is first checked for duplicate edges,
#' which can cause incorrect centrality calculations. If duplicate edges are
#' detected in an interactive session, a prompt will ask whether you want to
#' proceed or rectify edges first. This value may be set to `FALSE` to skip this
#' check and the interactive prompt.
#' @return Modified version of graph with additional 'centrality' column added.
#'
#' @note The `column` parameter is by default `d_weighted`, meaning centrality
#' is calculated by routing according to weighted distances. Other possible
#' values for this parameter are
#' \itemize{
#' \item `d` for unweighted distances
#' \item `time` for unweighted time-based routing
#' \item `time_weighted` for weighted time-based routing
#' }
#'
#' @note Centrality is calculated by default using parallel computation with the
#' maximal number of available cores or threads. This number can be reduced by
#' specifying a value via
#' `RcppParallel::setThreadOptions (numThreads = <desired_number>)`.
#'
#' @family centrality
#' @export
#' @examples
#' graph_full <- weight_streetnet (hampi)
#' graph <- dodgr_contract_graph (graph_full)
#' graph <- dodgr_centrality (graph)
#' # 'graph' is then the contracted graph with an additional 'centrality' column
#' # Same calculation via 'igraph':
#' igr <- dodgr_to_igraph (graph)
#' library (igraph)
#' cent <- edge_betweenness (igr)
#' identical (cent, graph$centrality) # TRUE
#' # Values of centrality between all junctions in the contracted graph can then
#' # be mapped back onto the original full network by "uncontracting":
#' graph_full <- dodgr_uncontract_graph (graph)
#' # For visualisation, it is generally necessary to merge the directed edges to
#' # form an equivalent undirected graph. Conversion to 'sf' format via
#' # 'dodgr_to_sf()' is also useful for many visualisation routines.
#' graph_sf <- merge_directed_graph (graph_full) %>%
#'     dodgr_to_sf ()
#'
#' \dontrun{
#' library (mapview)
#' centrality <- graph_sf$centrality / max (graph_sf$centrality)
#' ncols <- 30
#' cols <- c ("lawngreen", "red")
#' cols <- colorRampPalette (cols) (ncols) [ceiling (ncols * centrality)]
#' mapview (graph_sf, color = cols, lwd = 10 * centrality)
#' }
#'
#' # An example of flow aggregation across a generic (non-OSM) highway,
#' # represented as the 'routes_fast' object of the \pkg{stplanr} package,
#' # which is a SpatialLinesDataFrame containing commuter densities along
#' # components of a street network.
#' \dontrun{
#' library (stplanr)
#' # merge all of the 'routes_fast' lines into a single network
#' r <- overline (routes_fast, attrib = "length", buff_dist = 1)
#' r <- sf::st_as_sf (r)
#' # Convert to a 'dodgr' network, for which we need to specify both a 'type'
#' # and 'id' column.
#' r$type <- 1
#' r$id <- seq (nrow (r))
#' graph_full <- weight_streetnet (
#'     r,
#'     type_col = "type",
#'     id_col = "id",
#'     wt_profile = 1
#' )
#' # convert to contracted form, retaining junction vertices only, and append
#' # 'centrality' column
#' graph <- dodgr_contract_graph (graph_full) %>%
#'     dodgr_centrality ()
#' #' expand back to full graph; merge directed flows; and convert result to
#' # 'sf'-format for plotting
#' graph_sf <- dodgr_uncontract_graph (graph) %>%
#'     merge_directed_graph () %>%
#'     dodgr_to_sf ()
#' plot (graph_sf ["centrality"])
#' }
#'
dodgr_centrality <- function (graph,
                              contract = TRUE,
                              edges = TRUE,
                              column = "d_weighted",
                              vert_wts = NULL,
                              dist_threshold = NULL,
                              heap = "BHeap",
                              check_graph = TRUE) {

    column <- match.arg (column, c (
        "d_weighted",
        "d",
        "time",
        "time_weighted"
    ))

    centrality_input_check (graph, vert_wts)

    if (check_graph) {
        duplicated_edge_check (graph)
    }

    if (get_turn_penalty (graph) > 0.0) {
        res <- create_compound_junctions (graph)
        graph <- res$graph
        compound_junction_map <- res$edge_map
    }

    if (is.null (dist_threshold)) {
        dist_threshold <- .Machine$double.xmax
    }

    hps <- get_heap (heap, graph)
    heap <- hps$heap
    graph <- hps$graph

    gr_cols <- dodgr_graph_cols (graph)

    if (contract && methods::is (graph, "dodgr_contracted")) {
        contract <- FALSE
    }
    graph_full <- edge_map <- NULL
    if (contract && !methods::is (graph, "dodgr_contracted")) {
        graph_full <- graph
        graph <- dodgr_contract_graph (graph)
        hashc <- get_hash (graph, contracted = TRUE)
        fname_c <- fs::path (
            fs::path_temp (),
            paste0 ("dodgr_edge_map_", hashc, ".Rds")
        )
        if (!fs::file_exists (fname_c)) {
            stop ("something went wrong extracting the edge_map ... ")
        } # nocov
        edge_map <- readRDS (fname_c)
    }

    vert_map <- make_vert_map (graph, gr_cols)
    if (!is.null (vert_wts)) {
        vert_map$vert_wts <- vert_wts
    }

    graph2 <- convert_graph (graph, gr_cols)
    if (column != "d_weighted") {
        gr_cols2 <- dodgr_graph_cols (graph2)
        column <- gr_cols2 [[column]]
        graph2$d_weighted <- graph2 [[column]]
    }

    # final '0' is for sampling calculation to estimate speed - non-zero values
    # used only in 'estimate_centrality_time'
    centrality <- rcpp_centrality (
        graph2,
        vert_map,
        heap,
        dist_threshold,
        edges,
        0
    )

    res <- attach_centrality_to_graph (
        graph,
        centrality,
        edge_map,
        graph_full,
        edges,
        contract
    )
    # If 'contract = TRUE`, the return value, `res`, is then uncontracted

    res <- uncompound_junctions (res, "centrality", compound_junction_map)

    if (is_dodgr_cache_on () && edges) {
        # re-cache graph with centrality measure:
        attr (res, "px") <- cache_graph (res, gr_cols$edge_id)
    }

    return (res)
}

centrality_input_check <- function (graph, vert_wts) {

    if ("centrality" %in% names (graph)) {
        warning (
            "graph already has a 'centrality' column; ",
            "this will be overwritten"
        )
    }

    if (!is.null (vert_wts) &&
        (!is.vector (vert_wts) ||
            !is.numeric (vert_wts) ||
            length (vert_wts) != nrow (dodgr_vertices (graph)))) {

        stop (
            "vert_wts must be a vector of same length as ",
            "nrow (dodgr_vertices (graph))"
        )
    }
}

#' Attach centrality measures to initial graph
#'
#' @noRd
attach_centrality_to_graph <- function (graph, centrality, edge_map, graph_full,
                                        edges, contract) {

    if (edges) {
        graph$centrality <- centrality
        if (contract) {
            graph <- uncontract_graph (graph, edge_map, graph_full)
        }
        res <- graph
    } else {
        res <- dodgr_vertices (graph)
        res$centrality <- centrality
    }

    return (res)
}

#' Estimate a value for the 'dist_threshold' parameter of the
#' \link{dodgr_centrality} function.
#'
#' Providing distance thresholds to this
#' function generally provides considerably speed gains, and results in
#' approximations of centrality. This function enables the determination of
#' values of 'dist_threshold' corresponding to specific degrees of accuracy.
#'
#' @inheritParams dodgr_centrality
#' @param tolerance Desired maximal degree of inaccuracy in centrality estimates
#' - values will be accurate to within this amount, subject to a constant
#' scaling factor. Note that threshold values increase non-linearly with
#' decreasing values of 'tolerance'
#' @return A single value for 'dist_threshold' giving the required tolerance.
#'
#' @note This function may take some time to execute. While running, it displays
#' ongoing information on screen of estimated values of 'dist_threshold' and
#' associated errors. Thresholds are progressively increased until the error is
#' reduced below the specified tolerance.
#'
#' @family centrality
#' @export
estimate_centrality_threshold <- function (graph, tolerance = 0.001) {
    # nocov start - can't be tested on any sample data

    # estimate absolute maximum distance in graph
    v <- dodgr_vertices (graph)
    vs <- sample (v$id, 1000)
    dabsmax <- max (dodgr_dists (graph, from = vs, to = vs), na.rm = TRUE)

    nverts <- min (10000, nrow (v))
    if (nverts == 10000) {
        graphs <- dodgr_sample (graph, nverts = nverts)
    } else {
        graphs <- graph
    }

    # estimate max dist
    vs <- dodgr_vertices (graphs)
    vsample <- sample (vs$id, 1000)
    dmax <- max (dodgr_dists (graphs, from = vsample, to = vsample),
        na.rm = TRUE
    )
    if (dmax > (0.75 * dabsmax)) {
        message (
            "dist_threshold approaches size of graph;\n",
            "Recommended value of 'dist_threshold' remains 'NULL',\n",
            "with centrality calculated across entire graph"
        )
        return (NULL)
    }

    d <- 100
    mult <- 1.1
    ss <- 9999
    g_old <- dodgr_centrality (graphs, dist_threshold = d)
    while (ss > tolerance) {
        d <- signif (d * mult, 2)
        g <- dodgr_centrality (graphs, dist_threshold = d)
        mod <- stats::lm (g$centrality ~ g_old$centrality)
        ss <- sum (mod$residuals^2) / sum (g_old$centrality^2)
        message (
            "d = ", round (d), "; error = ",
            formatC (ss, format = "f", digits = 4)
        )
        g_old <- g

        if (d > dmax) {
            message ("re-sampling graph to ", nverts, " vertices")
            nverts <- signif (nverts * mult, 2)
            graphs <- dodgr_sample (graph, nverts)
            if (nverts > nrow (v)) {
                break
            }
        }
    }
    if (nverts > nrow (v)) {
        message (
            "Failed to converge within tolerance;\n",
            "Recommended value of 'dist_threshold' remains 'NULL',\n",
            "with centrality calculated across entire graph"
        )
        d <- NULL
    } else {
        message ("converged on distance threshold of ", d)
    }

    return (d)
    # nocov end
}

#' Estimate time required for a planned centrality calculation.
#'
#' The 'dodgr' centrality functions are designed to be applied to potentially
#' very large graphs, and may take considerable time to execute. This helper
#' function estimates how long a centrality function may take for a given graph
#' and given value of 'dist_threshold' estimated via the
#' \link{estimate_centrality_threshold} function.
#'
#' @inheritParams dodgr_centrality
#' @return An estimated calculation time for calculating centrality for the
#' given value of 'dist_threshold'
#'
#' @note This function may take some time to execute. While running, it displays
#' ongoing information on screen of estimated values of 'dist_threshold' and
#' associated errors. Thresholds are progressively increased until the error is
#' reduced below the specified tolerance.
#'
#' @family centrality
#' @export
estimate_centrality_time <- function (graph,
                                      contract = TRUE,
                                      edges = TRUE,
                                      dist_threshold = NULL,
                                      heap = "BHeap") {

    # copies all code from dodgr_centrality, but uses the otherwise non-exposed
    # 'sample' parameter passed through to C++ routines
    if (is.null (dist_threshold)) {
        dist_threshold <- .Machine$double.xmax
    }

    hps <- get_heap (heap, graph)
    heap <- hps$heap
    graph <- hps$graph

    gr_cols <- dodgr_graph_cols (graph)

    if (contract && methods::is (graph, "dodgr_contracted")) {
        contract <- FALSE
    }
    if (contract && !methods::is (graph, "dodgr_contracted")) {
        graph <- dodgr_contract_graph (graph)
        hashc <- get_hash (graph, contracted = TRUE)
        fname_c <- fs::path (
            fs::path_temp (),
            paste0 ("dodgr_edge_map_", hashc, ".Rds")
        )
        if (!fs::file_exists (fname_c)) {
            stop ("something went wrong extracting the edge_map ... ")
        } # nocov
    }

    vert_map <- make_vert_map (graph, gr_cols)

    graph2 <- convert_graph (graph, gr_cols)

    # final '0' is for sampling calculation to estimate speed - non-zero values
    # used only in 'estimate_centrality_time'
    st <- system.time (
        x <- rcpp_centrality (
            graph2, vert_map, heap,
            dist_threshold, edges, 100
        )
    ) [3]

    # convert to estimated time for full graph
    st <- st * nrow (graph) / 100

    hh <- floor (st / 3600)
    st <- st - 3600 * hh
    mm <- floor (st / 60)
    ss <- round (st - 60 * mm)

    hh <- sprintf ("%02d", hh)
    mm <- sprintf ("%02d", mm)
    ss <- sprintf ("%02d", ss)
    res <- paste0 (hh, ":", mm, ":", ss)
    message (
        "Estimated time to calculate centrality for full graph is ",
        res
    )
    invisible (res)
}

Try the dodgr package in your browser

Any scripts or data that you put into this service are public.

dodgr documentation built on June 7, 2023, 5:44 p.m.