R/border_segment.R

Defines functions border_segment

Documented in border_segment

#' Border Segment Creation for FE-estimation
#'
#' Creates \code{n} segments of a line (the RD cut-off) and assigns the closest border segment for each observation in the sf data frame.
#' Computationally these tasks are quite demanding when the sample size is big and thus might take a few seconds to complete.
#'
#' @param data  sf data frame containing point data
#' @param cutoff the RDD border in the form of a line (preferred) or borderpoints
#' @param n the number of segments to be produced
#'
#' @return a vector with factors, each category representing one segment
#' @export
#'
#' @examples
#' points_samp.sf <- sf::st_sample(polygon_full, 100) # create points
#' # make it an sf object bc st_sample only created the geometry list-column (sfc):
#' points_samp.sf <- sf::st_sf(points_samp.sf)
#' points_samp.sf$segment10 <- border_segment(points_samp.sf, cut_off, 3)
#'
border_segment <- function(data, cutoff, n = 10) {

  # CHECKS ON SF OBJECTS
  stopifnot(
    "data frame is not an sf object"        = inherits(data, "sf"),
    "cutoff is not an sf object"            = inherits(cutoff, "sf"),
    "CRS not matching between objects, transform them accordingly!"
                                            = sf::st_crs(data) == sf::st_crs(cutoff)
  )

  # first bifurcation depending on input type (POINT or LINE)
  if (sf::st_geometry_type(cutoff)[1] == "POINT" | sf::st_geometry_type(cutoff)[1] == "MULTIPOINT") {
    #message("Drawing", n, "points out of the given set of boundarypoints.")
    # only random sampling implemented for now in st_sample
    # thus i cook up standard subsetting
    cutoff$id <- 1:nrow(cutoff) # create a unique id
    # revision after miscoding as "always start with id = 1"
    # borderpoints.sf <- cutoff[seq(1, nrow(cutoff), round(nrow(cutoff) / n, 0)), ] # subset according to this rule
    # NEW APPROACH, comp efficient with RAM efficient solution from https://stackoverflow.com/questions/3318333/split-a-vector-into-chunks-in-r
    temp <- split(cutoff$id, sort(cutoff$id%%n)) # slice it in n equal parts
    temp <- as.numeric(lapply(temp,function(x){return(x[round(length(x)/2, 0)])})) # get the middle element of each slice
    borderpoints.sf <- cutoff[temp, ] # subset with the chosen boundarypoints
    borderpoints.sf$id <- 1:nrow(borderpoints.sf)
    closest <- list()
    for(i in seq_len(nrow(data))){
      closest[[i]] <- borderpoints.sf[which.min(sf::st_distance(borderpoints.sf, data[i, ])),]$id
    }
    closest <- as.factor(as.numeric(closest))
    closest

  } else { # this is when it is a line
    # this ensures that cutoffs with linegeometries that are not suited get transformed without braking the ones that are actually fine
    cutoff <- cutoff %>% sf::st_cast("LINESTRING")
    #cutoff <- cutoff %>% st_union() %>% st_as_sf() %>% st_transform(3857)
    #message("Starting to create", n, "border segments with an approximate length of", round(as.numeric(sf::st_length(cutoff))/1000/n, 0), "kilometres each.\n")
    borderpoints.sf <- sf::st_line_sample(cutoff, n)
    borderpoints.sf <- borderpoints.sf %>% sf::st_cast("POINT") # cast to POINT in order to get the rownumber right
    borderpoints.sf <- sf::st_sf(borderpoints.sf) # then we need to make it an sf data frame again
    borderpoints.sf$id <- 1:nrow(borderpoints.sf)
    closest <- list()
    for(i in seq_len(nrow(data))){
      closest[[i]] <- borderpoints.sf[which.min(sf::st_distance(borderpoints.sf, data[i, ])),]$id
    }
    closest <- as.factor(as.numeric(closest))
    closest
  }
}

# v 0.1.0 done
# suppressed print messages to console

Try the SpatialRDD package in your browser

Any scripts or data that you put into this service are public.

SpatialRDD documentation built on Aug. 8, 2023, 5:11 p.m.