#' Square grid over system area
#'
#' Creates a square grid with given cell size over the service area
#' of the dockless bike sharing system.
#'
#' @param area object of class \code{sf} representing the system area.
#' @param ... further arguments passed to \code{st_make_grid}.
#' @return If \code{type} is set to \code{polygons}, it returns an object
#' of class \code{sf} containing the grid cells as square polygons.
#' If \code{type} is set to \code{centers} it returns an object of
#' class \code{sf} containing the grid cell centroids as points.
#' @export
create_grid = function(area, ...) {
# Project the area
area_projected = dockless::project_sf(area)
# Create a grid over the area, with the given cell size
geometry = sf::st_make_grid(area_projected, ...)
grid = sf::st_sf(geometry)
# Clip the grid with the area
grid$intersects = as.vector(
sf::st_intersects(grid, area_projected, sparse = FALSE)
)
grid_clipped = grid[grid$intersects,]
grid_clipped$intersects = NULL
sf::st_transform(grid_clipped, crs = 4326)
}
#' Usage intensity in grid cells
#'
#' Calculates the number of pick-ups in each polygon of an overlaying grid.
#'
#' @param usage pick-ups, as an object of class \code{sf} with point geometry.
#' @param grid grid cells as an object of class \code{sf} with polygon geometry.
#' @return Returns an numeric vector with each element specifying the number
#' of pick-ups in the grid cell with corresponding index.
#' @export
usage_intensity = function(usage, grid) {
# Project the points to State Plane California Zone III (EPSG:26943)
points_projected = dockless::project_sf(usage)
# Project the polygons to State Plane California Zone III (EPSG:26943)
polygons_projected = dockless::project_sf(grid)
# Calculate the number of points in each polygon
f = function(x) {
nrow(sf::st_intersection(x, points_projected))
}
sapply(split(polygons_projected, 1:nrow(polygons_projected)), f)
}
#' Spatially constrained hierarchical clustering of a \code{dockless_dfc} object
#'
#' Clusters the \code{dockless_df} objects in a \code{dockless_dfc} with
#' spatially constrained hierarchical clustering.
#'
#' @param data object of class \code{dockless_dfc}.
#' @param grid grid cells as object of class \code{sf} with polygon geometry, in which
#' each cell refers to the spatial location of each \code{dockless_df} object in
#' the \code{dockless_dfc}. Must contain a column named \code{intensity}, which provides
#' the number of pick-ups per grid cill.
#' @param area object of class \code{sf} representing the system area.
#' @param K vector of integers specifying which values of k, the number of
#' clusters, should be tested.
#' @param omega vector of values specifying which values of alpha, the mixing
#' parameter, should be tested.
#' @return Returns a list of 2 with one element being a numeric vector of cluster
#' indices that specifies for each of the given \code{dockless_df} objects to which
#' cluster it belongs, and the second element being the geographical outines of each
#' cluster, bundled in an object of class \code{sf} with polygon geometry.
#' @export
spatial_cluster = function(data, grid, area, K, omega = seq(0, 1, 0.1)) {
# Create a dissimilarity matrix from the data
data_dis = dockless::dissimilarity_data(data)
# Create a spatial dissimilarity matrix
spatial_dis = dockless::dissimilarity_spatial(grid)
# Calculate Dunn Index for all k in K
validation = suppressWarnings(
clValid::clValid(
obj = as.matrix(data_dis),
nClust = K,
clMethods = 'hierarchical',
validation = 'internal',
method = 'ward'
)
)
dunn_indices = validation@measures[2, , ]
# Choose the optimal value of k
k_star = K[which.max(dunn_indices)]
# Spatially constrained hierarchical clusterings for all alpha in omega
information_criteria = ClustGeo::choicealpha(
data_dis,
spatial_dis,
range.alpha = omega,
K = k_star,
graph = FALSE
)
# Choose the optimal value of alpha
alpha_star = omega[which.max(information_criteria$Qnorm[which(information_criteria$Qnorm[, 1] >= 0.9), 2])]
# Cluster with spatially constrained hierarchical clustering
sch_clust = ClustGeo::hclustgeo(
D0 = data_dis,
D1 = spatial_dis,
alpha = alpha_star
)
# Cut the tree based on the provided number of clusters k
cluster_indices = stats::cutree(sch_clust, k = k_star)
# Add cluster information to grid cells
grid$cluster = cluster_indices
# Split by cluster
cells_per_cluster = split(
x = grid,
f = grid$cluster
)
# Dissolve grid cells per cluster
cells_dissolved = lapply(
cells_per_cluster,
function(x) sf::st_union(x)
)
# If a cluster is a multipolygon, split it into seperate polygons
# Return one sf data frame
f = function(x) {
if (methods::is(x, 'sfc_MULTIPOLYGON')) {
cluster = sf::st_sf(sf::st_cast(x, 'POLYGON'))
names(cluster)[1]= 'geometry'
sf::st_geometry(cluster) = 'geometry'
return(cluster)
} else {
cluster = sf::st_sf(x)
names(cluster)[1]= 'geometry'
sf::st_geometry(cluster) = 'geometry'
return(cluster)
}
}
cluster_outlines = do.call('rbind', lapply(cells_dissolved, f))
# Sort based on Y coordinate of centroid
cluster_centroids = sf::st_coordinates(
sf::st_centroid(dockless::project_sf(cluster_outlines))
)
cluster_outlines = cluster_outlines[order(cluster_centroids[,"Y"]), ]
# Add cluster index
cluster_outlines$cluster = as.factor(c(1:nrow(cluster_outlines)))
# Update cluster information of grid cells
grid$cluster = NULL
grid_updated = sf::st_join(
dockless::project_sf(grid),
dockless::project_sf(cluster_outlines),
join = sf::st_within
)
grid_updated = sf::st_transform(
grid_updated,
crs = 4326
)
# Calculate number of pick-ups per cluster
pickups_per_cluster = stats::aggregate(
grid_updated$intensity,
by = list(grid_updated$cluster),
FUN = sum
)
if (!all(pickups_per_cluster$x >= 56)) {
# Identify the cluster with too low usage intensity
small_cluster = which(pickups_per_cluster$x < 56)
# Calculate the centroids of all clusters
centroids = lapply(
cluster_outlines$geometry,
function(x) sf::st_centroid(x)
)
# For the small_cluster, find cluster with nearest centroid
distances = sapply(
centroids,
function(x) sf::st_distance(x, centroids[[small_cluster]])
)
nearest_cluster = which.min(distances[distances > 0])
# Replace cluster indices of small_cluster with cluster indices...
# ... of nearest cluster
grid_updated[grid_updated$cluster == small_cluster,]$cluster =
nearest_cluster
# Update cluster variable
grid_updated$cluster = factor(grid_updated$cluster)
# Split by cluster
cells_per_cluster = split(
x = grid_updated,
f = grid_updated$cluster
)
# Dissolve grid cells per cluster
cells_dissolved = lapply(
cells_per_cluster,
function(x) sf::st_union(x)
)
cluster_outlines = do.call('rbind', lapply(cells_dissolved, f))
# Sort based on Y coordinate of centroid
cluster_centroids = sf::st_coordinates(
sf::st_centroid(dockless::project_sf(cluster_outlines))
)
cluster_outlines = cluster_outlines[order(cluster_centroids[,"Y"]), ]
# Add cluster index
cluster_outlines$cluster = as.factor(c(1:nrow(cluster_outlines)))
# Update cluster information of grid cells
grid_updated$cluster = NULL
grid_updated = sf::st_join(
dockless::project_sf(grid_updated),
dockless::project_sf(cluster_outlines),
join = sf::st_within
)
grid_updated = sf::st_transform(
grid_updated,
crs = 4326
)
}
# Retrieve cluster indices
cluster_indices_updated = grid_updated$cluster
# Clip cluster outlines by system area
cluster_outlines_updated = sf::st_intersection(
dockless::project_sf(cluster_outlines),
dockless::project_sf(area)
)
cluster_outlines_updated = sf::st_transform(
cluster_outlines_updated,
crs = 4326
)
# Return list of indices and outlines
list(
indices = cluster_indices_updated,
outlines = cluster_outlines_updated
)
}
#' Create model points
#'
#' Creates an object of class \code{sf} containing the geographical locations
#' for which the forecasting models will be build. The locations are calculated
#' by taking, per cluster, the centroid of the grid cell centers , weighted by
#' the usage intensity of the grid cell polygons.
#'
#' @param centroids all grid cell centroids as an \code{sf} object with point geometry,
#' containing at least the attributes \code{cluster}, specifying to which cluster each
#' grid cell centroid belongs, and \code{intensity}, specifying the number of pick-ups
#' in the grid cell that corresponds to the grid cell centroid.
#' @return Returns an object of class \code{sf} with point geometry.
#' @export
create_modelpoints = function(centroids) {
# Split the centroids object by cluster
centroids_per_cluster = split(
x = centroids,
f = centroids$cluster
)
# Calculate weighted centroid per cluster
# Output as sf data frame instead of only sfc geometry
f = function(x) {
geometry = dockless::weighted_centroid(
points = x,
weights = x$intensity
)
sf::st_sf(geometry)
}
modelpoints = do.call('rbind', lapply(centroids_per_cluster, f))
# Add cluster information
modelpoints$cluster = as.factor(seq(1, nrow(modelpoints), 1))
return(modelpoints)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.