R/ddist.R

Defines functions ddist

Documented in ddist

#' Create dyads of points i and j and calculate distances
#'
#' This function calculates the geodesic distance between any dyads (pairs of points) and stores the result in a long tibble, a opposed to a wide matrix.
#'
#' @param data an object of class `data.frame` or `tibble`.
#' @param id a variable uniquely identifying geospatial points. Can be of type numeric, integer, character, or factor
#' @param longitude name of the numeric longitude variable. Defaults to "longitude"
#' @param latitude name of the numeric latitude variable. Defaults to "latitude"
#' @param crs a valid EPSG for a valid Coordinate Reference System (CRS) for your coordinates. Defaults to 4326.
#' @param crs_transform a logical value indicating whether to transform the CRS. Defaults to FALSE
#' @param new_crs a valid EPSG for a new CRS. See `rgdal::make_EPSG()` or \url{https://epsg.org/home.html}
#' @param diagonal a logical value. Keep the diagonal component in the distance matrix with dyads (i,i) and distance zero? Defaults to TRUE
#' @param duplicates a logical value. Keep "identical" dyads (i,j) and (j,i)? Defaults to TRUE. If set to FALSE, only one observation per dyad is kept. Note that this uses \link[dplyr]{rowwise} and makes the \link[dyadicdist]{ddist} function considerably more time-consuming
#' @return a long \link[tibble]{tibble} with dyads and dyadic distances incl. a distance unit indicator
#' @examples
#' library(tibble)
#' df <- tibble::tribble(
#' ~city_name, ~idvar, ~latitude, ~longitude,
#' "copenhagen", 5, 55.68, 12.58,
#' "stockholm", 2, 59.33, 18.07,
#' "oslo", 51, 59.91, 10.75
#' )
#' ddist(data = df, id = "idvar")
#' @author Jeppe Vierø
#' @import dplyr sf tidyr tibble rgdal readr stringr rlang tidyselect
#' @export

ddist <- function(data = NULL,
                  id = NULL,
                  longitude = "longitude",
                  latitude = "latitude",
                  crs = 4326,
                  crs_transform = FALSE,
                  new_crs = NULL,
                  diagonal = TRUE,
                  duplicates = TRUE) {

  check_crs_orig(crs = crs)

  check_crs(crs_transform = crs_transform,
            new_crs = new_crs)

  check_data(data = data,
             id = id,
             longitude = longitude,
             latitude = latitude)

  if (longitude != "longitude") {

    data <- data %>%
      dplyr::rename(longitude = !!rlang::sym(longitude))

  }

  if (latitude != "latitude") {

    data <- data %>%
      dplyr::rename(latitude = !!rlang::sym(latitude))

  }

  check_coords_ddist(data = data)

  data <- data %>%
    dplyr::filter(!is.na(.data$longitude) & !is.na(.data$latitude)) %>%
    sf::st_as_sf(coords = c("longitude", "latitude"),
                 crs = crs)

  if (crs_transform == TRUE) {

    data <- data %>%
      sf::st_transform(crs = new_crs)

  } else {

    data <- data

  }

  temp <- data %>%
    dplyr::distinct(!!rlang::sym(id),
                    .keep_all = TRUE) %>%
    dplyr::mutate(row_id = dplyr::row_number())

  dist_mat <- sf::st_distance(temp,
                              temp,
                              by_element = FALSE) %>%
    base::as.data.frame()

  length_units <- base::units(dist_mat[1,1])$numerator

  dist_mat <- dist_mat %>%
    dplyr::mutate(across(all_of(names(.)),
                         ~ base::unclass(.x)))

  dist_long <- dist_mat %>%
    tidyr::pivot_longer(cols = everything(),
                        names_to = "temp",
                        values_to = "distance")

  if(base::nrow(dist_long) == 1) {

    dist_long <- dist_long %>%
      dplyr::mutate(temp = "V1")

  } else if (base::nrow(dist_long) > 1) {

    dist_long <- dist_long

  }

  dist_long <- dist_long %>%
    dplyr::mutate(
      row_id_1 = base::sort(
        base::rep(
          base::seq(
            1:nrow(dist_mat)
          ),
          nrow(dist_mat)
        )
      )
    ) %>%
    dplyr::mutate(row_id_2 = readr::parse_number(.data$temp)) %>%
    dplyr::select(-.data$temp) %>%
    dplyr::mutate(distance_units = length_units)

  temp <- temp %>%
    tibble::tibble() %>%
    dplyr::select(-.data$geometry)

  dist_long <- dist_long %>%
    dplyr::left_join(temp,
                     by = c("row_id_1" = "row_id")) %>%
    dplyr::left_join(temp,
                     by = c("row_id_2" = "row_id")) %>%
    dplyr::rename_with(.cols = tidyselect::ends_with(".x"),
                       ~ stringr::str_replace_all(., '\\.x', '_1')) %>%
    dplyr::rename_with(.cols = tidyselect::ends_with(".y"),
                       ~ stringr::str_replace_all(., '\\.y', '_2')) %>%
    dplyr::mutate(id1 := !!rlang::sym(paste0(id,
                                             "_1")),
                  id2 := !!rlang::sym(paste0(id,
                                             "_2"))) %>%
    dplyr::mutate(
      match_id = base::paste(.data$id1,
                             .data$id2,
                             sep = "_"))

  if (duplicates == FALSE) {

    dist_long <- dist_long %>%
      dplyr::rowwise() %>%
      dplyr::mutate(
        tmp =
          base::paste(
            base::sort(
              c(
                .data$row_id_1,
                .data$row_id_2)
            ),
            collapse = "_")
      ) %>%
      dplyr::distinct(.data$tmp,
                      .keep_all = T) %>%
      dplyr::select(-.data$tmp)

  } else if (duplicates == TRUE) {

    dist_long <- dist_long

  }

  if (diagonal == TRUE) {

    dist_long <- dist_long %>%
      dplyr::select(-c(.data$id1, .data$id2,
                       .data$row_id_1,
                       .data$row_id_2)) %>%
      tibble::tibble()

  } else if (diagonal == FALSE) {

    dist_long <- dist_long %>%
      dplyr::filter(.data$id1 != .data$id2) %>%
      dplyr::select(-c(.data$id1, .data$id2,
                       .data$row_id_1,
                       .data$row_id_2)) %>%
      tibble::tibble()

  }

  return(dist_long)

}
jvieroe/dyadicdist documentation built on Jan. 21, 2022, 8:43 a.m.