View source: R/linestring_aggregation.R
aggregate_clustered_flows | R Documentation |
This function aggregates flows within clusters and creates a single
representative line for each cluster. The start and end coordinates are
computed as weighted averages (weighted by flow counts or another variable),
or simple means if no weights are provided. Each cluster is represented
by one LINESTRING
.
aggregate_clustered_flows(flows, weight = NULL, crs = sf::st_crs(flows))
flows |
An |
weight |
(optional) Name of a column in |
crs |
Coordinate reference system for the output (default: taken from
|
An sf
object with one line per cluster, containing:
count_total
: total weight (if provided), otherwise number of flows
size
: the cluster size (from the input, not recomputed)
geometry
: a LINESTRING
representing the aggregated OD flow
# ----- 1. Basic Usage: A quick, runnable example ---
# This demonstrates the function with minimal, fast data preparation.
flows <- flowcluster::flows_leeds
# Create the required input columns in a single, fast pipeline
flows_clustered <- flows |>
add_xyuv() |>
# Manually create 3 dummy clusters for demonstration
dplyr::mutate(cluster = sample(1:3, size = nrow(flows), replace = TRUE)) |>
# The function requires a 'size' column, so we add it
dplyr::group_by(cluster) |>
dplyr::add_tally(name = "size") |>
dplyr::ungroup()
# Demonstrate the function
flows_agg_w <- aggregate_clustered_flows(flows_clustered, weight = "count")
print(flows_agg_w)
# ----- 2. Detailed Workflow (not run by default) ---
## Not run:
# This example shows the ideal end-to-end workflow, from raw data
# to clustering and finally aggregation. It is not run during checks
# because the clustering steps are too slow.
# a) Prepare the data by filtering and adding coordinates
flows_prep <- flowcluster::flows_leeds |>
sf::st_transform(3857) |>
add_flow_length() |>
filter_by_length(length_min = 5000, length_max = 12000) |>
add_xyuv()
# b) Calculate distances and cluster the flows
distances <- flow_distance(flows_prep, alpha = 1.5, beta = 0.5)
dmat <- distance_matrix(distances)
wvec <- weight_vector(dmat, flows_prep, weight_col = "count")
flows_clustered_real <- cluster_flows_dbscan(dmat, wvec, flows_prep, eps = 8, minPts = 70)
# c) Filter clusters and add a 'size' column
flows_clustered_real <- flows_clustered_real |>
dplyr::filter(cluster != 0) |> # Filter out noise points
dplyr::group_by(cluster) |>
dplyr::mutate(size = dplyr::n()) |>
dplyr::ungroup()
# d) Now, use the function on the clustered data
flows_agg_real <- aggregate_clustered_flows(flows_clustered_real, weight = "count")
print(flows_agg_real)
# e) Visualize the results
if (requireNamespace("tmap", quietly = TRUE)) {
library(tmap)
# This plot uses modern tmap v4 syntax.
tm_shape(flows_clustered_real, facet = "cluster") +
tm_lines(col = "grey50", alpha = 0.5) +
tm_shape(flows_agg_real) +
tm_lines(col = "red", lwd = 2) +
tm_layout(title = "Original Flows (Grey) and Aggregated Flows (Red)")
}
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.