#' @importFrom Rdpack reprompt
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#### helper functions ####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title Projection test
#' @description Check if a feature collection is in a projected CRS
#' @param obj A feature collection
#' @return A boolean
#' @importFrom sf st_is_longlat st_crs st_crs<-
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
is_projected <- function(obj) {
!is.na(st_crs(obj)) &
!st_is_longlat(obj) &
is.null(st_crs(obj)$to_meter)
}
#' @title Geometry sanity check
#'
#' @description Function to check if the geometries given by the user are valid.
#'
#' @param lines A feature collection of lines
#' @param samples A feature collection of points (the samples)
#' @param events A feature collection of points (the events)
#' @param study_area A feature collection of polygons (the study_area)
#' @return TRUE if all the checks are passed
#' @importFrom sf st_geometry_type st_is_valid
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
check_geometries <- function(lines,samples,events, study_area){ # nocov start
# checking if geometries are all valid, simple and planar
obj_names <- c("lines","samples","events")
objs <- list(lines,samples,events)
for(i in 1:length(obj_names)){
obj <- objs[[i]]
obj_name <- obj_names[[i]]
types <- st_geometry_type(obj)
test <- grepl("MULTI", types, fixed = TRUE)
if(any(test)){
stop(paste("the ",obj_name," must be simple geometries,
considere using the function sf::st_cast to convert them to simple geometries",sep=""))
}
if(any(st_is_valid(obj) == FALSE)){
stop(paste("the ",obj_name," must be valid geometries",sep=""))
}
if(is_projected(obj)==FALSE){
stop(paste("the ",obj_name," must be projected (planar coordinates)",sep=""))
}
}
# checking if the geometries type are good
if(class(events)[[1]]!="sf" | unique(st_geometry_type(events))!="POINT"){
stop("the events must be given as a feature class of points")
}
if(class(samples)[[1]]!="sf" | unique(st_geometry_type(samples))!="POINT"){
stop("the samples must be given as a feature class of points")
}
if(class(lines)[[1]]!="sf" | unique(st_geometry_type(lines))!="LINESTRING"){
stop("the lines must be given as a feature class of linestrings")
}
# checking if the CRS are good
if(is.null(study_area)){
comp <- c(st_crs(samples) == st_crs(events),
st_crs(lines) == st_crs(events),
st_crs(lines) == st_crs(samples))
}else{
comp <- c(st_crs(study_area) == st_crs(events),
st_crs(study_area) == st_crs(samples),
st_crs(study_area) == st_crs(lines),
st_crs(samples) == st_crs(events),
st_crs(lines) == st_crs(events),
st_crs(lines) == st_crs(samples))
}
if(any(comp==FALSE)){
stop("the lines, events and samples must have the same Coordinates Reference System (crs)")
}
return(TRUE)
}
#defining some global variables (weird felx but ok)
utils::globalVariables(c("spid", "weight", ".", "bws",'X','Y','wid','m_X','m_Y')) # nocov end
#' @title Clean events geometries
#'
#' @description Function to avoid having events at the same location.
#'
#' @param events The feature collection of points to contract (must have a weight column)
#' @param digits The number of digits to keep
#' @param agg A double indicating if the points must be aggregated within a distance.
#' if NULL, then the points are aggregated by rounding the coordinates.
#' @return A new feature collection of points
#' @importFrom data.table tstrsplit setDF
#' @importFrom sf st_coordinates
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
clean_events <- function(events,digits=5,agg=NULL){
base_crs <- st_crs(events)
if(is.null(agg)){
events$spid <- sp_char_index(st_coordinates(events),digits)
if("time" %in% names(events)){
events$spid <- paste0(events$spid,"_",events$time)
}
edf <- st_drop_geometry(events)
new_events <- data.table(edf)
n1 <- names(new_events)[names(new_events) %in% c("weight","spid") == FALSE]
agg_events <- new_events[, lapply(.SD, max) , by = .(spid), .SDcols = n1]
agg_events$weight <- new_events[, .(sum(weight)), by = .(spid)][,2]
if("time" %in% names(events)){
agg_events[, c("X", "Y","time") := tstrsplit(spid, "_", fixed = TRUE)]
agg_events$time <- as.numeric(agg_events$time)
}else{
agg_events[, c("X", "Y") := tstrsplit(spid, "_", fixed = TRUE)]
}
agg_events$X <- as.numeric(agg_events$X)
agg_events$Y <- as.numeric(agg_events$Y)
new_events <- setDF(agg_events)
new_events <- st_as_sf(new_events,
coords = c("X", "Y"),
crs = st_crs(events))
}else{
if("time" %in% names(events)){
XY <- st_coordinates(events)
events$X <- XY[,1]
events$Y <- XY[,2]
events$gp <- aggregate_points(events,agg, return_ids = TRUE)
if(sum(events$gp) == 0){
new_events <- events
new_events$gp <- NULL
new_events$spid <- sp_char_index(st_coordinates(new_events),digits)
}else{
events <- st_drop_geometry(events)
part1 <- subset(events, events$gp == 0)
part2 <- setDT(subset(events, events$gp != 0))
mean_coords <- part2[,.(m_X = mean(X),
m_Y = mean(Y),
m_wid = data.table::first(wid)
), by = "gp"]
events2 <- setDT(merge(part2, mean_coords, by = "gp", all.x = TRUE))
events2$m_X <- ifelse(is.na(events2$m_X), events2$X,events2$m_X)
events2$m_Y <- ifelse(is.na(events2$m_Y), events2$X,events2$m_Y)
new_events <- events2[, .(X = data.table::first(m_X),
Y = data.table::first(m_Y),
wid = data.table::first(wid),
weight = sum(weight)
), by = c("gp","time")]
new_events <- rbind(new_events, part1[names(new_events)])
new_events$gp <- NULL
new_events <- st_as_sf(new_events, coords = c('X','Y'), crs = base_crs)
}
}else{
new_events <- aggregate_points(events,agg)
new_events$spid <- sp_char_index(st_coordinates(new_events),digits)
}
}
# we can now add the goid : unique id of each location
coords <- st_coordinates(new_events)
new_events$goid <- as.numeric(as.factor(sp_char_index(coords,digits)))
return(new_events)
}
#' @title Events aggregation
#'
#' @description Function to aggregate points within a radius.
#'
#' @details This function can be used to aggregate points within a radius. This
#' is done by using the dbscan algorithm. This process is
#' repeated until no more modification is applied.
#'
#' @param points The feature collection of points to contract (must have a weight column)
#' @param maxdist The distance to use
#' @param weight The name of the column to use as weight (default is "weight").
#' The values of the aggregated points for this column will be summed. For all
#' the other columns, only the max value is retained.
#' @param return_ids A boolean (default is FALSE), if TRUE, then an index indicating
#' for each point the group it belongs to is returned. If FALSE, then a spatial
#' point features is returned with the points already aggregated.
#' @return A new feature collection of points
#' @export
#' @examples
#' data(bike_accidents)
#' bike_accidents$weight <- 1
#' agg_points <- aggregate_points(bike_accidents, 5)
aggregate_points <- function(points, maxdist, weight = "weight", return_ids = FALSE){
num_cols <- unlist(lapply(points, is.numeric))
num_names <- names(points)[num_cols]
points <- points[num_names]
## reimplementation avec dbscan
coords <- st_coordinates(points)
points$X <- coords[,1]
points$Y <- coords[,2]
result <- dbscan::dbscan(coords, eps = maxdist, minPts = 2)
if(return_ids){
return(result$cluster)
}
pt_list <- split(points, f = result$cluster)
if(length(pt_list) == 1){
all_pts <- points
}else{
old_pts <- pt_list[[1]]
#mat1 <- cbind(st_coordinates(old_pts), st_drop_geometry(old_pts))
mat1 <- st_drop_geometry(old_pts)
new_pts <- data.frame(t(sapply(2:length(pt_list), function(i){
pts <- pt_list[[i]]
if(nrow(pts) == 1 ){
return(c(st_drop_geometry(pts)))
}else{
coords <- colMeans(st_coordinates(pts))
feat_max <- sapply(num_names, function(name){max(pts[[name]])})
feat_max[names(feat_max) == weight] <- sum(pts[[weight]])
#feat_max <- apply(st_drop_geometry(pts), 2, max)
#feat_max[names(feat_max) == weight] <- sum(pts[[weight]])
#feat_max <- feat_max[names(feat_max) %in% num_names]
return(c(coords, feat_max))
}
})))
new_pts <- rbind(mat1, new_pts)
for (coln in c("X","Y", num_names)){
new_pts[[coln]] <- as.numeric(new_pts[[coln]])
}
all_pts <- st_as_sf(new_pts, coords = c("X","Y"))
all_pts[c("X","Y")] <- st_coordinates(all_pts)
st_crs(all_pts) <- st_crs(points)
}
return(all_pts)
}
#' @title Prior data preparation
#'
#' @description A simple function to prepare data before the NKDE calculation.
#'
#' @param samples A feature collection of points representing the samples points
#' @param lines A feature collection of Linestrings representing the network
#' @param events A feature collection of points representing the events points
#' @param w A numeric vector representing the weight of the events
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @param agg A double indicating if the points must be aggregated within a distance.
#' if NULL, then the points are aggregated by rounding the coordinates.
#' @return the data prepared for the rest of the operations
#' @importFrom data.table tstrsplit setDF
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
prepare_data <- function(samples,lines,events, w ,digits,tol, agg){
## step1 cleaning the events
events$weight <- w
# the goid for the events is added here
# two events at the same location will have the same goid
# but the events with different time stamp must not be aggregated
# geographically
events <- clean_events(events,digits,agg)
## step2 defining the global IDS
#events$goid <- 1:nrow(events)
samples$goid <- 1:nrow(samples)
samples <- samples[c("goid")]
# removing existing X and Y coordinates
if("X" %in% names(samples)){
samples$X <- NULL
}
if("Y" %in% names(samples)){
samples$Y <- NULL
}
if("X" %in% names(events)){
events$X <- NULL
}
if("Y" %in% names(events)){
events$Y <- NULL
}
## step3 remove lines with no length
lines$length <- as.numeric(st_length(lines))
lines <- subset(lines, lines$length>0)
lines$oid <- 1:nrow(lines)
return(list("samples" = samples,
"lines" = lines[c("length","oid")],
"events" = events))
}
#' @title Split data with a grid
#'
#' @description Function to split the dataset according to a grid.
#'
#' @param grid A spatial grid to split the data within
#' @param samples A feature collection of points representing the samples points
#' @param events A feature collection of points representing the events points
#' @param lines A feature collection of linestrings representing the network
#' @param bw The kernel bandwidth (used to avoid edge effect)
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @param split_all A boolean indicating if we must split the lines at each vertex
#' (TRUE) or only at event vertices (FALSE)
#' @return A list with the split dataset
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
split_by_grid <- function(grid,samples,events,lines,bw,tol, digits, split_all = TRUE){
## step1 : creating the spatial trees
tree_samples <- build_quadtree(samples)
tree_events <- build_quadtree(events)
tree_lines <- build_quadtree(lines)
## step2 : split the datasets
selections <- lapply(1:nrow(grid),function(i){
square <- grid[i,]
# selecting the samples in the grid
sel_samples <- spatial_request(square,tree_samples,samples)
# if there is no sampling points in the rectangle, then return NULL
if(nrow(sel_samples)==0){
return(NULL)
}
# selecting the events in a buffer
buff <- st_buffer(square,dist=bw)
buff2 <- st_buffer(square,dist=(bw+0.5*bw))
sel_events <- spatial_request(buff,tree_events,events)
# selecting the lines in a buffer
sel_lines <- spatial_request(buff2,tree_lines,lines)
sel_lines$oid <- 1:nrow(sel_lines)
# snapping the events on the lines
if(nrow(sel_events)==0){
new_lines <- sel_lines
}else{
a <- nrow(sel_events)
b <- nrow(sel_lines)
x <- a*b
# if(is.na(x)){
# stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
# }
# if(x >= 2*10^9){
# stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
# }
sel_events <- snapPointsToLines2(sel_events,sel_lines,idField = "oid", snap_dist = bw)
if(split_all){
new_lines <- add_vertices_lines(sel_lines,sel_events,sel_events$nearest_line_id,tol)
}else{
new_lines <- split_lines_at_vertex(sel_lines,sel_events,sel_events$nearest_line_id,tol)
}
}
# split lines at events
if(split_all){
new_lines <- simple_lines(new_lines)
}
new_lines$length <- as.numeric(st_length(new_lines))
new_lines <- subset(new_lines,new_lines$length>0)
# remove lines that are loops
new_lines <- remove_loop_lines(new_lines,digits)
new_lines$oid <- 1:nrow(new_lines)
new_lines <- new_lines[c("length","oid")]
return(list("samples" = sel_samples,
"events" = sel_events,
"lines" = new_lines))
})
#let us remove all the empty quadra
selections <- selections[lengths(selections) != 0]
for(i in 1:length(selections)){
selections[[i]]$index <- i
}
return(selections)
}
#' @title Split data with a grid for the adaptive bw function
#'
#' @description Function to split the dataset according to a grid for the
#' adaptive bw function.
#'
#' @param grid A spatial grid to split the data within
#' @param events A feature collection of points representing the events
#' @param lines A feature collection of lines representing the network
#' @param bw The kernel bandwidth (used to avoid edge effect)
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @return A list with the split dataset
#' @importFrom sf st_buffer
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
split_by_grid_abw <- function(grid,events,lines,bw,tol,digits){
## step1 : creating the spatial trees
tree_events <- build_quadtree(events)
tree_lines <- build_quadtree(lines)
## step2 : split the datasets
selections <- lapply(1:nrow(grid),function(i){
square <- grid[i,]
# selecting the events in the grid
sel_events_rect <- spatial_request(square,tree_events,events)
# if there is no event points in the rectangle, then return NULL
if(nrow(sel_events_rect)==0){
return(NULL)
}
# selecting the events in a buffer
buff <- st_buffer(square,dist=bw)
buff2 <- st_buffer(square,dist=(bw+0.5*bw))
sel_events <- spatial_request(buff,tree_events,events)
# selecting the lines in a buffer
sel_lines <- spatial_request(buff2,tree_lines,lines)
sel_lines$oid <- 1:nrow(sel_lines)
# snapping the events on the lines
if(nrow(sel_events)==0){
new_lines <- sel_lines
}else{
a <- nrow(sel_events)
b <- nrow(sel_lines)
x <- a*b
if(is.na(x)){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
if(x >= 2*10^9){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
sel_events <- snapPointsToLines2(sel_events,sel_lines,idField = "oid")
new_lines <- add_vertices_lines(sel_lines,sel_events,sel_events$nearest_line_id,tol)
}
# split lines at events
new_lines <- simple_lines(new_lines)
new_lines$length <- as.numeric(st_length(new_lines))
new_lines <- subset(new_lines,new_lines$length>0)
# remove lines that are loops
new_lines <- remove_loop_lines(new_lines,digits)
new_lines$oid <- 1:nrow(new_lines)
new_lines <- new_lines[c("length","oid")]
sel_events_rect <- subset(sel_events, sel_events$goid %in% sel_events_rect$goid)
return(list("samples" = sel_events_rect,
"events" = sel_events,
"lines" = new_lines))
})
#let us remove all the empty quadra
selections <- selections[lengths(selections) != 0]
for(i in 1:length(selections)){
selections[[i]]$index <- i
}
return(selections)
}
#' @title Split data with a grid (multicore)
#'
#' @description Function to split the dataset according to a grid with multicore support.
#'
#' @param grid A spatial grid to split the data within
#' @param samples A feature collection of points representing the samples points
#' @param events A feature collection of points representing the events points
#' @param lines A feautre collection of linestrings representing the network
#' @param bw The kernel bandwidth (used to avoid edge effect)
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @param split_all A boolean indicating if we must split the lines at each vertex
#' (TRUE) or only at event vertices (FALSE)
#' @return A list with the split dataset
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
split_by_grid.mc <- function(grid,samples,events,lines,bw,tol,digits, split_all = TRUE){
## step1 creating the spatial trees
tree_samples <- build_quadtree(samples)
tree_events <- build_quadtree(events)
tree_lines <- build_quadtree(lines)
## step2 split the datasets
#NB : because we can't send c++ pointer to child process, we must start with
#splitting the spatial objects in a sequential way and only after
#snapping and splitting in a multicore fashion
sub_samples <- lapply(1:nrow(grid),function(i){
square <- grid[i,]
#selecting the samples in the grid
sel_samples <- spatial_request(square,tree_samples,samples)
##return NULL if there is no sampling point in the rectangle
if(nrow(sel_samples)==0){
return(NULL)
}
#selecting the events in a buffer
buff <- st_buffer(square, dist = bw)
buff2 <- st_buffer(square, dist=(bw+0.5*bw))
sel_events <- spatial_request(buff,tree_events,events)
#selecting the lines in a buffer
sel_lines <- spatial_request(buff2,tree_lines,lines)
sel_lines$oid <- 1:nrow(sel_lines)
return(list("sel_lines" = sel_lines,
"sel_events"=sel_events,
"sel_samples"=sel_samples))
})
selections <- future.apply::future_lapply(sub_samples,function(sub){
if(is.null(sub)){
return (NULL)
}
sel_lines <- sub$sel_lines
sel_events <- sub$sel_events
sel_samples <- sub$sel_samples
#snapping the events on the lines
if(nrow(sel_events)==0){
new_lines <- sel_lines
}else{
a <- nrow(sel_events)
b <- nrow(sel_lines)
x <- a*b
if(is.na(x)){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
if(x >= 2*10^9){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
sel_events <- snapPointsToLines2(sel_events,sel_lines,idField = "oid")
if(split_all){
new_lines <- add_vertices_lines(sel_lines,sel_events,sel_events$nearest_line_id,tol)
}else{
new_lines <- split_lines_at_vertex(sel_lines,sel_events,sel_events$nearest_line_id,tol)
}
}
#split lines at events
if(split_all){
new_lines <- simple_lines(new_lines)
}
new_lines$length <- as.numeric(st_length(new_lines))
new_lines <- subset(new_lines,new_lines$length>0)
# remove lines that are loops
new_lines <- remove_loop_lines(new_lines,digits)
new_lines$oid <- 1:nrow(new_lines)
new_lines <- new_lines[c("length","oid")]
return(list("samples" = sel_samples,
"events" = sel_events,
"lines" = new_lines))
}, future.packages = c("spNetwork"))
#let us remove the empty regions
selections <- selections[lengths(selections) != 0]
#randomize the elements to minimize calculation time
selections <- sample(selections)
for(i in 1:length(selections)){
selections[[i]]$index <- i
}
return(selections)
}
#' @title Split data with a grid for the adaptive bw function (multicore)
#'
#' @description Function to split the dataset according to a grid for the
#' adaptive bw function with multicore support
#'
#' @param grid A spatial grid to split the data within
#' @param events A feature collection of points representing the events points
#' @param lines A feature collection of lines representing the network
#' @param bw The kernel bandwidth (used to avoid edge effect)
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @return A list with the split dataset
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
split_by_grid_abw.mc <- function(grid,events,lines,bw,tol,digits){
## step1 creating the spatial trees
tree_events <- build_quadtree(events)
tree_lines <- build_quadtree(lines)
## step2 split the datasets
#NB : because we can't send c++ pointer to child process, we must start with
#splitting the spatial objects in a sequential way and only after
#snapping and splitting in a multicore fashion
sub_samples <- lapply(1:nrow(grid),function(i){
square <- grid[i,]
#selecting the samples in the grid
sel_events_rect <- spatial_request(square,tree_events,events)
##return NULL if there is no sampling point in the rectangle
if(nrow(sel_events_rect)==0){
return(NULL)
}
#selecting the events in a buffer
buff <- st_buffer(square,dist = bw)
buff2 <- st_buffer(square,dist = (bw+0.5*bw))
sel_events <- spatial_request(buff,tree_events,events)
#selecting the lines in a buffer
sel_lines <- spatial_request(buff2,tree_lines,lines)
sel_lines$oid <- 1:nrow(sel_lines)
return(list("sel_lines" = sel_lines,
"sel_events"=sel_events,
"sel_events_rect"=sel_events_rect))
})
selections <- future.apply::future_lapply(sub_samples,function(sub){
if(is.null(sub)){
return (NULL)
}
sel_lines <- sub$sel_lines
sel_events <- sub$sel_events
sel_events_rect <- sub$sel_events_rect
#snapping the events on the lines
if(nrow(sel_events)==0){
new_lines <- sel_lines
}else{
a <- nrow(sel_events)
b <- nrow(sel_lines)
x <- a*b
if(is.na(x)){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
if(x >= 2*10^9){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
sel_events <- snapPointsToLines2(sel_events,sel_lines,idField = "oid")
invisible(capture.output(new_lines <- add_vertices_lines(sel_lines,sel_events,sel_events$nearest_line_id,tol)))
}
#split lines at events
new_lines <- simple_lines(new_lines)
new_lines$length <- as.numeric(st_length(new_lines))
new_lines <- subset(new_lines,new_lines$length>0)
# remove lines that are loops
new_lines <- remove_loop_lines(new_lines,digits)
new_lines$oid <- 1:nrow(new_lines)
new_lines <- new_lines[c("length","oid")]
sel_events_rect <- subset(sel_events, sel_events$goid %in% sel_events_rect$goid)
return(list("samples" = sel_events_rect,
"events" = sel_events,
"lines" = new_lines))
}, future.packages = c("spNetwork"))
#let us remove the empty regions
selections <- selections[lengths(selections) != 0]
#randomize the elements to minimize calculation time
selections <- sample(selections)
for(i in 1:length(selections)){
selections[[i]]$index <- i
}
return(selections)
}
#' @title Adaptive bandwidth
#'
#' @description Function to calculate Adaptive bandwidths according to Abramson’s smoothing regimen.
#'
#' @param grid A spatial grid to split the data within
#' @param events A feature collection of points representing the events points
#' @param lines A feature collection of linestrings representing the network
#' @param bw The fixed kernel bandwidth (can also be a vector, the value returned will be a matrix in that case)
#' @param trim_bw The maximum size of local bandwidths (can also be a vector, must match bw)
#' @param method The method to use when calculating the NKDE
#' @param kernel_name The name of the kernel to use
#' @param max_depth The maximum recursion depth
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @param sparse A Boolean indicating if sparse matrix should be used
#' @param verbose A Boolean indicating if update messages should be printed
#' @return A vector with the local bandwidths
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
adaptive_bw <- function(grid,events,lines,bw,trim_bw,method,kernel_name,max_depth,tol,digits,sparse,verbose){
##step 1 split the datas !
selections <- split_by_grid_abw(grid,events,lines,max(trim_bw), tol, digits)
## step 2 calculating the temp NKDE values
if(verbose){
print("start calculating the kernel values for the adaptive bandwidth...")
}
n_quadra <- length(selections)
dfs <- lapply(1:n_quadra,function(i){
sel <- selections[[i]]
# bws should be a vector if we only have on global bandwidth
if(length(bw) == 1){
bws <- rep(bw,nrow(sel$events))
}else{
# and it will be a matrix if we have several bandwidths
bws <- sapply(bw, function(x){
rep(x, nrow(sel$events))
})
}
if(verbose){
print(paste(" quadra ",i,"/",n_quadra,sep=""))
}
values <- nkde_worker(lines = sel$lines,
events = sel$events,
samples = sel$samples,
kernel_name = kernel_name,
bw = bw,
bws = bws,
method = method,
div = "none",
digits = digits,
tol = tol,
sparse = sparse,
max_depth = max_depth,
verbose = verbose)
if(any(values==0)){
# Adding a better way to explain the bug would be great !
stop("This is embarassing, we should not get 0 values here (bug code 0001). Please report the bug at https://github.com/JeremyGelb/spNetwork/issues and provide the dataset used.")
}
# df <- data.frame("goid"=sel$samples$goid,
# "k" = values)
if(nrow(sel$samples) == 1){
df <- c(sel$samples$goid, values)
}else{
df <- cbind(sel$samples$goid, values)
}
return(df)
})
## step 3 combining the results
tot_df <- do.call(rbind,dfs)
#tot_df <- tot_df[order(tot_df[,1]),]
#tot_df <- tot_df[match(tot_df[,1], events$goid),]
tot_df <- tot_df[match(events$goid,tot_df[,1]),]
## step 4 calculating the new bandwidth !
if(ncol(tot_df) == 2){
k <- tot_df[,2]
delta <- calc_gamma(k)
new_bw <- bw * (k**(-1/2) * delta**(-1))
new_bw <- ifelse(new_bw<trim_bw,new_bw,trim_bw)
}else{
new_bw <- sapply(2:ncol(tot_df), function(i){
k <- tot_df[,i]
delta <- calc_gamma(k)
nbw <- bw[[i-1]] * (k**(-1/2) * delta**(-1))
nbw <- ifelse(nbw<trim_bw[[i-1]],nbw,trim_bw[[i-1]])
return(nbw)
})
}
return(new_bw)
}
#' @title Adaptive bandwidth (multicore)
#'
#' @description Function to calculate Adaptive bandwidths according to
#' Abramson’s smoothing regimen with multicore support
#'
#' @param grid A spatial grid to split the data within
#' @param events A feature collection of points representing the events
#' @param lines A feature collection of linestrings representing the network
#' @param bw The fixed kernel bandwidth (can also be a vector, the value returned will be a matrix in that case)
#' @param trim_bw The maximum size of local bandwidths (can also be a vector, must match bw)
#' @param method The method to use when calculating the NKDE
#' @param kernel_name The name of the kernel to use
#' @param max_depth The maximum recursion depth
#' @param digits The number of digits to keep
#' @param tol A float indicating the spatial tolerance when snapping events on
#' lines
#' @param sparse A Boolean indicating if sparse matrix should be used
#' @param verbose A Boolean indicating if update messages should be printed
#' @return A vector with the local bandwidths
#' @keywords internal
#' @examples
#' #This is an internal function, no example provided
adaptive_bw.mc <- function(grid,events,lines,bw,trim_bw,method,kernel_name,max_depth,tol,digits,sparse,verbose){
##step 1 split the datas !
selections <- split_by_grid_abw.mc(grid,events,lines,trim_bw, tol, digits)
## step 2 calculating the temp NKDE values
if(verbose){
print("start calculating the kernel values for the adaptive bandwidth...")
}
if(verbose){
print("starting the kernel calculations ...")
progressr::with_progress({
p <- progressr::progressor(along = selections)
dfs <- future.apply::future_lapply(selections, function(sel) {
# bws should be a vector if we only have on global bandwidth
if(length(bw) == 1){
bws <- rep(bw,nrow(sel$events))
}else{
# and it will be a matrix if we have several bandwidths
bws <- sapply(bw, function(x){
rep(x, nrow(sel$events))
})
}
invisible(capture.output(values <- spNetwork::nkde_worker(lines = sel$lines,
events = sel$events,
samples = sel$samples,
kernel_name = kernel_name,
bw = bw,
bws = bws,
method = method,
div = "none",
digits = digits,
tol = tol,
sparse = sparse,
max_depth = max_depth,
verbose = verbose)
))
# df <- data.frame("goid"=sel$samples$goid,
# "k" = values)
if(nrow(sel$samples) == 1){
df <- c(sel$samples$goid, values)
}else{
df <- cbind(sel$samples$goid, values)
}
p(sprintf("i=%g", sel$index))
return(df)
}, future.packages = c("spNetwork"))
})
}else{
dfs <- future.apply::future_lapply(selections, function(sel) {
# bws should be a vector if we only have on global bandwidth
if(length(bw) == 1){
bws <- rep(bw,nrow(sel$events))
}else{
# and it will be a matrix if we have several bandwidths
bws <- sapply(bw, function(x){
rep(x, nrow(sel$events))
})
}
values <- spNetwork::nkde_worker(lines = sel$lines,
events = sel$events,
samples = sel$samples,
kernel_name = kernel_name,
bw = bw,
bws = bws,
method = method,
div = "none",
digits = digits,
tol = tol,
sparse = sparse,
max_depth = max_depth,
verbose = verbose)
# df <- data.frame("goid"=sel$samples$goid,
# "k" = values)
if(nrow(sel$samples) == 1){
df <- c(sel$samples$goid, values)
}else{
df <- cbind(sel$samples$goid, values)
}
return(df)
}, future.packages = c("spNetwork"))
}
## step 3 combining the results
tot_df <- do.call(rbind,dfs)
#tot_df <- tot_df[order(tot_df[,1]),]
tot_df <- tot_df[match(events$goid,tot_df[,1]),]
## step 4 calculating the new bandwidth !
if(ncol(tot_df) == 2){
k <- tot_df[,2]
delta <- calc_gamma(k)
new_bw <- bw * (k**(-1/2) * delta**(-1))
new_bw <- ifelse(new_bw<trim_bw,new_bw,trim_bw)
}else{
new_bw <- sapply(2:ncol(tot_df), function(i){
k <- tot_df[,i]
delta <- calc_gamma(k)
nbw <- bw[[i-1]] * (k**(-1/2) * delta**(-1))
nbw <- ifelse(nbw<trim_bw[[i-1]],nbw,trim_bw[[i-1]])
return(nbw)
})
}
return(new_bw)
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#### worker functions ####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title NKDE worker
#'
#' @description The worker function for nkde and nkde.mc
#'
#' @param lines A feature collection of linestrings representing the network. The
#' geometries must be simple lines (may crash if some geometries
#' are invalid)
#' @param events A feature collection of points representing the events on the
#' network. The points will be snapped on the network.
#' @param samples A feature collection of points representing the locations for
#' which the densities will be estimated.
#' @param kernel_name The name of the kernel to use
#' @param bw The global kernel bandwidth
#' @param bws The kernel bandwidth (in meters) for each event. Is usually a vector
#' but could also be a matrix if several global bandwidths were used. In this
#' case, the output value is also a matrix.
#' @param method The method to use when calculating the NKDE, must be one of
#' simple / discontinuous / continuous (see details for more information)
#' @param div The divisor to use for the kernel. Must be "n" (the number of
#' events within the radius around each sampling point), "bw" (the bandwidth)
#' "none" (the simple sum).
#' @param digits The number of digits to keep in the spatial coordinates. It
#' ensures that topology is good when building the network. Default is 3
#' @param tol When adding the events and the sampling points to the network,
#' the minimum distance between these points and the lines extremities. When
#' points are closer, they are added at the extremity of the lines.
#' @param sparse A Boolean indicating if sparse or regular matrices should be
#' used by the Rcpp functions. Regular matrices are faster, but require more
#' memory and could lead to error, in particular with multiprocessing. Sparse
#' matrices are slower, but require much less memory.
#' @param max_depth When using the continuous and discontinuous methods, the
#' calculation time and memory use can go wild if the network has a lot of
#' small edges (area with a lot of intersections and a lot of events). To
#' avoid it, it is possible to set here a maximum depth. Considering that the
#' kernel is divided at intersections, a value of 8 should yield good
#' estimates. A larger value can be used without problem for the discontinuous
#' method. For the continuous method, a larger value will strongly impact
#' calculation speed.
#' @param verbose A Boolean, indicating if the function should print messages
#' about the process.
#' @importFrom igraph adjacent_vertices get.edge.ids
#' @return A numeric vector with the nkde values
#' @keywords internal
#' @export
#' @examples
#' #This is an internal function, no example provided
nkde_worker <- function(lines, events, samples, kernel_name, bw, bws, method, div, digits, tol, sparse, max_depth, verbose = FALSE){
# if we do not have event in that space, just return 0 values
if(nrow(events)==0){
values <- rep(0,nrow(samples))
return(values)
}
## step1 creating the graph
if(verbose){
print(" build graph ...")
}
graph_result <- build_graph(lines,digits = digits,line_weight = "length")
graph <- graph_result$graph
nodes <- graph_result$spvertices
edges <- graph_result$spedges
## step2 for each sample, find its belonging line
a <- nrow(events)
b <- nrow(lines)
x <- a*b
if(is.na(x)){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
if(x >= 2*10^9){
stop(paste("The matrix size will be exceeded (",a," x ",b,"), please consider using a finer grid to split the study area",sep=""))
}
#snapped_samples <- maptools::snapPointsToLines(samples,edges,idField = "edge_id")
snapped_samples <- snapPointsToLines2(samples,edges, snap_dist = max(bws), idField = "edge_id")
samples$edge_id <- snapped_samples$nearest_line_id
## step3 finding for each event, its corresponding node
events$vertex_id <- closest_points(events, nodes)
## step4 adding the spatial coordinates to samples and nodes
XY_nodes <- st_coordinates(nodes)
nodes$X_coords <- XY_nodes[,1]
nodes$Y_coords <- XY_nodes[,2]
XY_samples <- st_coordinates(samples)
samples$X_coords <- XY_samples[,1]
samples$Y_coords <- XY_samples[,2]
## step5 adding a local oid for samples and events
events$oid <- 1:nrow(events)
samples$oid <- 1:nrow(samples)
## step6 starting the calculations !
if(verbose){
print(" calculating NKDE values ...")
}
if(is.null(dim(bws))){
# in this case, bws is a simple vector
values <- calc_density(method, kernel_name, graph_result, graph, events, samples, bws, nodes, edges, div, max_depth, verbose, sparse)
}else{
values <- apply(bws, MARGIN = 2, FUN = function(x){
vals <- calc_density(method, kernel_name, graph_result, graph, events, samples, x, nodes, edges, div, max_depth, verbose = FALSE, sparse)
return(vals)
})
}
return(values)
}
# A worker function to calculate densities. It has been added to facilitate bw selection in case of
# adaptive bandwidth
calc_density <- function(method, kernel_name, graph_result, graph, events, samples, bws, nodes, edges, div, max_depth, verbose, sparse){
if(method == "simple"){
# the cas of the simple method, no c++ here
kernel_func <- select_kernel(kernel_name)
if(verbose){
values <- simple_nkde(graph, events, samples, bws, kernel_func, nodes, edges)
}else{
invisible(capture.output(values <- simple_nkde(graph, events, samples, bws, kernel_func, nodes, edges, div)))
}
}else{
# we have to call the package with the Rcpp functions
# this is necessary because this function can be used in a multicore context
## step6.5 preparing the complementary object for the rcpp function
neighbour_list <- adjacent_vertices(graph,nodes$id,mode="out")
neighbour_list <- lapply(neighbour_list,function(x){return (as.numeric(x))})
if(method=="continuous"){
##and finally calculating the values
if (sparse){
values <- spNetwork::continuous_nkde_cpp_arma_sparse(neighbour_list, events$vertex_id, events$weight,
st_drop_geometry(samples), bws, kernel_name, st_drop_geometry(nodes),
graph_result$linelist, max_depth, verbose, div)
}else{
values <- spNetwork::continuous_nkde_cpp_arma(neighbour_list, events$vertex_id, events$weight,
st_drop_geometry(samples), bws, kernel_name, st_drop_geometry(nodes),
graph_result$linelist, max_depth, verbose, div)
}
}
if(method == "discontinuous"){
#let this commented here to debug and test sessions
# invisible(capture.output(values <- discontinuous_nkde2(edge_list,neighbour_list, events$vertex_id, events$weight,
# samples@data, bw, kernel_func, nodes@data, graph_result$linelist, max_depth, verbose)))
if(sparse){
values <- spNetwork::discontinuous_nkde_cpp_arma_sparse(neighbour_list, events$vertex_id, events$weight,
st_drop_geometry(samples), bws, kernel_name, st_drop_geometry(nodes), graph_result$linelist, max_depth, verbose)
}else{
values <- spNetwork::discontinuous_nkde_cpp_arma(neighbour_list, events$vertex_id, events$weight,
st_drop_geometry(samples), bws, kernel_name, st_drop_geometry(nodes), graph_result$linelist, max_depth, verbose)
}
}
}
## step7 adjusting the kernel values !
if(div == "n"){
return(values$sum_k / values$n)
}else if (div == "bw"){
#return(values$sum_k * (1/bw))
# NOTE : if div == "bw", then the scaling is done during the estimation
# because we might have adaptive BW
return(values$sum_k)
}else if (div == "none"){
return(values$sum_k)
}
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#### main functions ####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title Network Kernel density estimate
#'
#' @description Calculate the Network Kernel Density Estimate based on a network of lines,
#' sampling points, and events
#' @md
#' @details
#' **The three NKDE methods**\cr
#' Estimating the density of a point process is commonly done by using an
#' ordinary two-dimensional kernel density function. However, there are numerous
#' cases for which the events do not occur in a two-dimensional space but on a
#' network (like car crashes, outdoor crimes, leaks in pipelines, etc.). New
#' methods were developed to adapt the methodology to networks, three of them
#' are available in this package.
#'
#' * The simple method: This first method was presented by \insertCite{xie2008kernel}{spNetwork} and proposes an
#' intuitive solution. The distances between events and sampling points are
#' replaced by network distances, and the formula of the kernel is adapted to
#' calculate the density over a linear unit instead of an areal unit.
#' * The discontinuous method: The previous method has been criticized by
#' \insertCite{okabe2009kernel}{spNetwork}, arguing that the estimator proposed
#' is biased, leading to an overestimation of density in events hot-spots. More
#' specifically, the simple method does not conserve mass and the induced kernel
#' is not a probability density along the network. They thus proposed a
#' discontinuous version of the kernel function on network, which equally
#' "divides" the mass density of an event at intersections.
#' * The continuous method: If the discontinuous method is unbiased, it leads
#' to a discontinuous kernel function which is a bit counter-intuitive.
#' \insertCite{okabe2009kernel;textual}{spNetwork} proposed another version of
#' the kernel, which divides the mass of the density at intersections but adjusts
#' the density before the intersection to make the function continuous.
#'
#' The three methods are available because, even though that the simple method is
#' less precise statistically speaking, it might be more intuitive. From a
#' purely geographical view, it might be seen as a sort of distance decay
#' function as used in Geographically Weighted Regression.\cr \cr\cr **adaptive bandwidth**\cr
#' It is possible to use adaptive bandwidth instead of fixed
#' bandwidth. Adaptive bandwidths are calculated using the Abramson’s smoothing
#' regimen \insertCite{abramson1982bandwidth}{spNetwork}. To do so, an original
#' fixed bandwidth must be specified (bw parameter), and is used to estimate the
#' priory densitiy at event locations. These densities are then used to
#' calculate local bandwidth. The maximum size of the local bandwidth can be
#' limited with the parameter trim_bw. For more details, see the vignettes.
#' \cr\cr **Optimization parameters**\cr The grid_shape parameter allows to
#' split the calculus of the NKDE according to a grid dividing the study area.
#' It might be necessary for big dataset to reduce the memory used. If the
#' grid_shape is c(1,1), then a full network is built for the area. If the
#' grid_shape is c(2,2), then the area is split in 4 rectangles. For each
#' rectangle, the sample points falling in the rectangle are used, the events
#' and the lines in a radius of the bandwidth length are used. The results are
#' combined at the end and ordered to match the original order of the samples.
#' \cr\cr The geographical coordinates of the start and end of lines are used to
#' build the network. To avoid troubles with digits, we truncate the coordinates
#' according to the digit parameter. A minimal loss of precision is expected but
#' results in a fast construction of the network. \cr\cr To calculate the
#' distances on the network, all the events are added as vertices. To reduce the
#' size of the network, it is possible to reduce the number of vertices by
#' adding the events at the extremity of the lines if they are close to them.
#' This is controlled by the parameter tol. \cr\cr In the same way, it is
#' possible to limit the number of vertices by aggregating the events that are
#' close to each other. In that case, the weights of the aggregated events are
#' summed. According to an aggregation distance, a buffer is drawn around the
#' fist event, all events falling in that buffer are aggregated to the first
#' event, forming a new event. The coordinates of this new event are the means of
#' the original events coordinates. This procedure is repeated until no events
#' are aggregated. The aggregation distance can be fixed with the parameter agg.
#' \cr\cr When using the continuous and discontinuous kernel, the density is
#' reduced at each intersection crossed. In the discontinuous case, after 5
#' intersections with four directions each, the density value is divided by 243
#' leading to very small values. In the same situation but with the continuous
#' NKDE, the density value is divided by approximately 7.6. The max_depth
#' parameters allows the user to control the maximum depth of these two NKDE.
#' The base value is 15, but a value of 10 would yield very close estimates. A
#' lower value might have a critical impact on speed when the bandwidth is large.
#' \cr\cr When using the continuous and discontinuous kernel, the connections
#' between graph nodes are stored in a matrix. This matrix is typically sparse,
#' and so a sparse matrix object is used to limit memory use. If the network is
#' small (typically when the grid used to split the data has small rectangles)
#' then a classical matrix could be used instead of a sparse one. It
#' significantly increases speed, but could lead to memory issues.
#'
#' @references{ \insertAllCited{} }
#'
#' @template nkde_params-arg
#' @template diggle_corr-arg
#' @param samples A feature collection of points representing the locations for
#' which the densities will be estimated.
#' @template nkde_params2-arg
#' @template nkde_geoms-args
#' @template sparse-arg
#' @template grid_shape-arg
#' @template verbose-arg
#' @template check-arg
#' @return A vector of values, they are the density estimates at sampling points
#' @export
#' @examples
#' \donttest{
#' data(mtl_network)
#' data(bike_accidents)
#' lixels <- lixelize_lines(mtl_network,200,mindist = 50)
#' samples <- lines_center(lixels)
#' densities <- nkde(mtl_network,
#' events = bike_accidents,
#' w = rep(1,nrow(bike_accidents)),
#' samples = samples,
#' kernel_name = "quartic",
#' bw = 300, div= "bw",
#' adaptive = FALSE,
#' method = "discontinuous", digits = 1, tol = 1,
#' agg = 15,
#' grid_shape = c(1,1),
#' verbose=FALSE)
#' }
nkde <- function(lines, events, w, samples, kernel_name, bw, adaptive=FALSE, trim_bw=NULL, method, div="bw", diggle_correction = FALSE, study_area = NULL, max_depth = 15, digits=5, tol=0.1, agg=NULL, sparse=TRUE, grid_shape=c(1,1), verbose=TRUE, check=TRUE){
## step0 basic checks REPRENDRE ICI
if(verbose){
print("checking inputs ...")
}
if((kernel_name %in% c("triangle", "gaussian", "scaled gaussian", "tricube", "cosine" ,"triweight", "quartic", 'epanechnikov','uniform'))==FALSE){
stop('the name of the kernel function must be one of c("triangle", "gaussian", "scaled gaussian", "tricube", "cosine" ,"triweight", "quartic", "epanechnikov" ,"uniform")')
}
if(method %in% c("simple","continuous","discontinuous") == FALSE){
stop('the method must be one of c("simple","continuous","discontinuous"')
}
if(method == "continuous" & kernel_name == "gaussian"){
stop("using the continuous NKDE and the gaussian kernel function can yield negative values for densities because the gaussian kernel does not integrate to 1 within the bandiwdth, please consider using the quartic kernel instead")
}
if(min(bw)<=0){
stop("the bandwidth for the kernel must be superior to 0")
}
if(adaptive & (length(bw) > 1 )){
stop("When adaptive is TRUE, only a global bandwidth must be given")
}
if(adaptive & is.null(trim_bw)){
stop("if adaptive is TRUE, a value for trim_bw must be supplied")
}
if(diggle_correction & is.null(study_area)){
stop("the study_area must be defined if the Diggle correction factor is used")
}
if(check){
checked <- check_geometries(lines,samples,events, study_area)
}
## step1 : preparing the data
if(verbose){
print("prior data preparation ...")
}
events$bws <- bw
data <- prepare_data(samples, lines, events, w, digits, tol, agg)
lines <- data$lines
samples <- data$samples
events <- data$events
## step2 creating the grid
grid <- build_grid(grid_shape,list(lines,samples,events))
## adaptive bandwidth !
if(adaptive == FALSE){
# if (length(bw) == 1){
# bws <- rep(bw,nrow(events))
# }else{
# bws <- bw
# }
bws <- events$bws
}else{
## we want to use an adaptive bw
# bws <- adaptive_bw.mc(grid, events, lines, bw, trim_bw, method,
# kernel_name, max_depth, tol, digits, sparse, verbose)
bws <- adaptive_bw(grid, events, lines, bw, trim_bw, method,
kernel_name, max_depth, tol, digits, sparse, verbose)
}
## calculating the correction factor
if(diggle_correction){
if(verbose){
print("Calculating the correction factor")
}
corr_factor <- correction_factor(study_area,events,lines,method,bws, kernel_name, tol, digits, max_depth, sparse)
}else{
corr_factor <- rep(1,nrow(events))
}
events$weight <- events$weight * corr_factor
events$bw <- bws
max_bw <- max(bws)
## step3 splitting the dataset with each rectangle
if(verbose){
print("Splitting the data with the spatial grid ...")
}
selections <- split_by_grid(grid,samples,events,lines,max_bw, tol, digits)
## step 4 calculating the values
if(verbose){
print("start calculating the kernel values ...")
}
n_quadra <- length(selections)
dfs <- lapply(1:n_quadra,function(i){
sel <- selections[[i]]
if(verbose){
print(paste(" quadra ",i,"/",n_quadra,sep=""))
}
values <- nkde_worker(lines = sel$lines,
events = sel$events,
samples = sel$samples,
kernel_name = kernel_name, bw = bw,
bws = sel$events$bw, method = method, div, digits,
tol,sparse, max_depth, verbose)
df <- data.frame("goid"=sel$samples$goid,
"k" = values)
return(df)
})
if(verbose){
print("combining the results ...")
}
## step5 combining the results
tot_df <- do.call(rbind,dfs)
tot_df <- tot_df[order(tot_df$goid),]
if(adaptive){
return(list("events" = events,
"k" = tot_df$k))
}else{
return(tot_df$k)
}
}
#' @title Network Kernel density estimate (multicore)
#'
#' @description Calculate the Network Kernel Density Estimate based on a network of lines,
#' sampling points, and events with multicore support.
#'
#' @details For more details, see help(nkde)
#'
#' @template nkde_params-arg
#' @template diggle_corr-arg
#' @param samples A feature collection of points representing the locations for
#' which the densities will be estimated.
#' @template nkde_params2-arg
#' @template nkde_geoms-args
#' @template sparse-arg
#' @template grid_shape-arg
#' @param verbose A Boolean, indicating if the function should print messages
#' about the process.
#' @template check-arg
#' @return A vector of values, they are the density estimates at sampling
#' points
#' @export
#' @examples
#' \donttest{
#' data(mtl_network)
#' data(bike_accidents)
#' future::plan(future::multisession(workers=1))
#' lixels <- lixelize_lines(mtl_network,200,mindist = 50)
#' samples <- lines_center(lixels)
#' densities <- nkde.mc(mtl_network,
#' events = bike_accidents,
#' w = rep(1,nrow(bike_accidents)),
#' samples = samples,
#' kernel_name = "quartic",
#' bw = 300, div= "bw",
#' adaptive = FALSE, agg = 15,
#' method = "discontinuous", digits = 1, tol = 1,
#' grid_shape = c(3,3),
#' verbose=TRUE)
#' ## make sure any open connections are closed afterward
#' if (!inherits(future::plan(), "sequential")) future::plan(future::sequential)
#' }
nkde.mc <- function(lines, events, w, samples, kernel_name, bw, adaptive=FALSE, trim_bw=NULL, method, div="bw", diggle_correction = FALSE, study_area = NULL, max_depth = 15, digits=5, tol=0.1,agg=NULL, sparse=TRUE, grid_shape=c(1,1), verbose=TRUE, check=TRUE){
## step0 basic checks
if(verbose){
print("checking inputs ...")
}
if((kernel_name %in% c("triangle", "gaussian", "scaled gaussian", "tricube", "cosine" ,"triweight", "quartic", 'epanechnikov','uniform'))==FALSE){
stop('the name of the kernel function must be one of c("triangle", "gaussian", "scaled gaussian", "tricube", "cosine" ,"triweight", "quartic", "epanechnikov" ,"uniform")')
}
if(method %in% c("simple","continuous","discontinuous") == FALSE){
stop('The method must be one of c("simple","continuous","discontinuous"')
}
if(method == "continuous" & kernel_name == "gaussian"){
stop("using the continuous NKDE and the gaussian kernel function can yield negative values for densities because the gaussian kernel does not integrate to 1 within the bandiwdth, please consider using the quartic kernel instead")
}
if(min(bw)<=0){
stop("the bandwidth for the kernel must be superior to 0")
}
if(adaptive & (length(bw) > 1 )){
stop("When adaptive is TRUE, only a global bandwidth must be given")
}
if(adaptive & is.null(trim_bw)){
stop("if adaptive is TRUE, a value for trim_bw must be supplied")
}
if(diggle_correction & is.null(study_area)){
stop("the study_area must be defined if the Diggle correction factor is used")
}
if(check){
check_geometries(lines,samples,events,study_area)
}
## step1 preparing the data
if(verbose){
print("prior data preparation ...")
}
events$bws <- bw
data <- prepare_data(samples, lines, events,w,digits,tol,agg)
lines <- data$lines
samples <- data$samples
events <- data$events
## step2 creating the grid
grid <- build_grid(grid_shape,list(lines,samples,events))
## adaptive bandwidth !
if(adaptive==FALSE){
bws <- events$bws
}else{
if(verbose){
print("calculating the local bandwidth ...")
}
## we want to use an adaptive bw
bws <- adaptive_bw.mc(grid,events,lines,bw,trim_bw,method,kernel_name,max_depth,tol,digits,sparse,verbose)
}
## calculating the correction factor
if(diggle_correction){
if(verbose){
print("Calculating the correction factor")
}
corr_factor <- correction_factor(study_area,events,lines,method,bws, kernel_name, tol, digits, max_depth, sparse)
}else{
corr_factor <- rep(1,nrow(events))
}
events$weight <- events$weight * corr_factor
events$bw <- bws
max_bw <- max(bws)
## step3 splitting the dataset with each rectangle
selections <- split_by_grid(grid,samples,events,lines,max_bw, digits,tol)
## step4 calculating the values
if(verbose){
print("starting the kernel calculations ...")
progressr::with_progress({
p <- progressr::progressor(along = selections)
dfs <- future.apply::future_lapply(selections, function(sel) {
invisible(capture.output(values <- nkde_worker(sel$lines, sel$events,
sel$samples, kernel_name,bw,
sel$events$bw, method, div, digits,
tol,sparse, max_depth, verbose)))
df <- data.frame("goid"=sel$samples$goid,
"k" = values)
p(sprintf("i=%g", sel$index))
return(df)
}, future.packages = c("spNetwork"))
})
}else{
dfs <- future.apply::future_lapply(selections, function(sel) {
values <- nkde_worker(sel$lines, sel$events,
sel$samples, kernel_name,bw,
sel$events$bw, method, div, digits,
tol,sparse, max_depth, verbose)
df <- data.frame("goid"=sel$samples$goid,
"k" = values)
return(df)
}, future.packages = c("spNetwork"))
}
if(verbose){
print("combining the results ...")
}
## step5 combining the results
tot_df <- do.call(rbind,dfs)
tot_df <- tot_df[order(tot_df$goid),]
events$bws <- NULL
if(adaptive){
return(list("events" = events,
"k" = tot_df$k))
}else{
return(tot_df$k)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.