inst/stplanr-old-od-code/oneway.R

# Functions for aggregating two-way OD pairs into 'oneway' lines
#' Aggregate ods so they become non-directional
#'
#' For example, sum total travel in both directions.
#' @param x A data frame or SpatialLinesDataFrame, representing an OD matrix
#' @param attrib A vector of column numbers or names
#' for deciding which attribute(s) of class numeric to
#' aggregate
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @param stplanr.key A key of unique OD pairs regardless of the order,
#' autogenerated by [od_id_order()]
#' @return `onewayid` outputs a data.frame with rows containing
#' results for the user-selected attribute values that have been aggregated.
#' @family lines
#' @export
onewayid <- function(x, attrib, id1 = names(x)[1], id2 = names(x)[2],
                     stplanr.key = od_id_order(x, id1, id2)) UseMethod("onewayid")

#' @name onewayid
#' @details
#' Flow data often contains movement in two directions: from point A to point B
#' and then from B to A. This can be problematic for transport planning, because
#' the magnitude of flow along a route can be masked by flows the other direction.
#' If only the largest flow in either direction is captured in an analysis, for
#' example, the true extent of travel will by heavily under-estimated for
#' OD pairs which have similar amounts of travel in both directions.
#' Flows in both direction are often represented by overlapping lines with
#' identical geometries (see [flowlines()]) which can be confusing
#' for users and are difficult to plot.
#' @examples
#' flow_oneway <- onewayid(flow, attrib = 3)
#' nrow(flow_oneway) < nrow(flow) # result has fewer rows
#' sum(flow$All) == sum(flow_oneway$All) # but the same total flow
#' # using names instead of index for attribute
#' onewayid(flow, attrib = "All")
#' # using many attributes to aggregate
#' attrib <- which(vapply(flow, is.numeric, TRUE))
#' flow_oneway <- onewayid(flow, attrib = attrib)
#' colSums(flow_oneway[attrib]) == colSums(flow[attrib]) # test if the colSums are equal
#' # Demonstrate the results from onewayid and onewaygeo are identical
#' flow_oneway_geo <- onewaygeo(flowlines, attrib = attrib)
#' plot(flow_oneway$All, flow_oneway_geo$All)
#' flow_oneway_sf <- onewayid(flowlines_sf, 3)
#' plot(flow_oneway_geo, lwd = flow_oneway_geo$All / mean(flow_oneway_geo$All))
#' plot(flow_oneway_sf$geometry, lwd = flow_oneway_sf$All / mean(flow_oneway_sf$All))
#' @export
onewayid.data.frame <- function(x, attrib, id1 = names(x)[1], id2 = names(x)[2],
                                stplanr.key = od_id_order(x, id1, id2)) {
  if (is.numeric(attrib)) {
    attrib_names <- names(x)[attrib]
  } else {
    attrib_names <- attrib
    attrib <- which(names(x) %in% attrib)
  }

  # separate geometry for sf objects
  is_sf <- is(x, "sf")
  if (is_sf) {
    x_sf <- sf::st_sf(stplanr.key[3], geometry = sf::st_geometry(x))
    x <- sf::st_drop_geometry(x)
  }

  x <- dplyr::bind_cols(x, stplanr.key)

  x_oneway_numeric <- dplyr::group_by(x, stplanr.key) %>%
    dplyr::summarise_at(attrib, sum)

  x_oneway_binary <- dplyr::mutate(x, is_two_way = duplicated(stplanr.key)) %>%
    dplyr::group_by(stplanr.key) %>%
    dplyr::summarise(is_two_way = dplyr::last(.data$is_two_way)) %>%
    dplyr::select(-stplanr.key)

  x_oneway_character <- x %>%
    dplyr::transmute(
      id1 = stringr::str_split(.data$stplanr.key, " ", simplify = TRUE)[, 1],
      id2 = stringr::str_split(.data$stplanr.key, " ", simplify = TRUE)[, 2],
      stplanr.key = .data$stplanr.key
    ) %>%
    dplyr::group_by(stplanr.key) %>%
    dplyr::summarise(id1 = dplyr::first(id1), id2 = dplyr::first(id2)) %>%
    dplyr::select(-stplanr.key)

  x_oneway <- dplyr::bind_cols(
    x_oneway_character,
    x_oneway_numeric,
    x_oneway_binary
  )

  if (is_sf) {
    x_sf <- x_sf[!duplicated(x_sf$stplanr.key), ]
    x_oneway <- sf::st_as_sf(dplyr::inner_join(x_oneway, x_sf))
    # class(x_oneway) # sf
  }

  x_oneway$stplanr.key <- NULL
  names(x_oneway)[1:2] <- c(id1, id2)

  return(x_oneway)
}

#' @name onewayid
#' @examples
#' # with spatial data
#' data(flowlines)
#' fo <- onewayid(flowlines, attrib = "All")
#' head(fo@data)
#' plot(fo)
#' sum(fo$All) == sum(flowlines$All)
#' # test results for one line
#' n <- 3
#' plot(fo[n, ], lwd = 20, add = TRUE)
#' f_over_n <- rgeos::gEquals(fo[n, ], flowlines["All"], byid = TRUE)
#' sum(flowlines$All[f_over_n]) == sum(fo$All[n]) # check aggregation worked
#' plot(flowlines[which(f_over_n)[1], ], add = TRUE, col = "white", lwd = 10)
#' plot(flowlines[which(f_over_n)[2], ], add = TRUE, lwd = 5)
#' @export
onewayid.SpatialLines <- function(x, attrib, id1 = names(x)[1], id2 = names(x)[2],
                                  stplanr.key = od_id_order(x, id1, id2)) {
  x_geom <- sp::SpatialLines(x@lines, proj4string = sp::CRS(proj4string(x)))
  x <- x@data

  x_oneway <- onewayid(x, id1, id2, attrib = attrib)
  x_oneway$stplanr.key <- od_id_order(x_oneway[c(id1, id2)])$stplanr.key

  if (length(x_geom) != nrow(x_oneway)) {
    id_old <- paste(x[[id1]], x[[id2]])
    sel <- id_old %in% x_oneway$stplanr.key
    x_geom <- x_geom[sel, ]
  }

  x_oneway <- sp::SpatialLinesDataFrame(sl = x_geom, data = x_oneway, match.ID = FALSE)

  return(x_oneway)
}

#' Generate ordered ids of OD pairs so lowest is always first
#' This function is slow on large datasets, see szudzik_pairing for faster alternative
#'
#' @inheritParams onewayid
#'
#' @examples
#' x <- data.frame(id1 = c(1, 1, 2, 2, 3), id2 = c(1, 2, 3, 1, 4))
#' od_id_order(x) # 4th line switches id1 and id2 so stplanr.key is in order
#' @export
od_id_order <- function(x, id1 = names(x)[1], id2 = names(x)[2]) {
  data.frame(
    stringsAsFactors = FALSE,
    stplanr.id1 = x[[id1]],
    stplanr.id1 = x[[id2]],
    stplanr.key = od_id_character(x[[id1]], x[[id2]])
  )
}
#' Combine two ID values to create a single ID number
#'
#' @details
#' In OD data it is common to have many 'oneway' flows from "A to B" and "B to A".
#' It can be useful to group these an have a single ID that represents pairs of IDs
#' with or without directionality, so they contain 'twoway' or bi-directional values.
#'
#' `od_id*` functions take two vectors of equal length and return a vector of IDs,
#' which are unique for each combination but the same for twoway flows.
#'
#' -  the Szudzik pairing function, on two vectors of equal
#' length. It returns a vector of ID numbers.
#'
#' This function superseeds od_id_order as it is faster on large datasets
#' @param x a vector of numeric, character, or factor values
#' @param y a vector of numeric, character, or factor values
#' @param ordermatters logical, does the order of values matter to pairing, default = FALSE
#' @family od
#' @seealso od_oneway
#' @name od_id
#' @examples
#' (d <- od_data_sample[2:9, 1:2])
#' (id <- od_id_character(d[[1]], d[[2]]))
#' duplicated(id)
#' od_id_szudzik(d[[1]], d[[2]])
#' od_id_max_min(d[[1]], d[[2]])
#' n <- 100
#' ids <- as.character(runif(n, 1e4, 1e7 - 1))
#' # benchmark of methods:
#' x <- data.frame(
#'   id1 = rep(ids, times = n),
#'   id2 = rep(ids, each = n),
#'   val = 1,
#'   stringsAsFactors = FALSE
#' )
#' bench::mark(check = FALSE, iterations = 10,
#'   od_id_order(x),
#'   od_id_character(x$id1, x$id2),
#'   od_id_szudzik(x$id1, x$id2),
#'   od_id_max_min(x$id1, x$id2)
#' )
NULL
#' @rdname od_id
#' @export
od_id_szudzik <- function(x, y, ordermatters = FALSE) {
  if (length(x) != length(y)) {
    stop("x and y are not of equal length")
  }

  if (class(x) == "factor") {
    x <- as.character(x)
  }
  if (class(y) == "factor") {
    y <- as.character(y)
  }
  lvls <- unique(c(x, y))
  x <- as.integer(factor(x, levels = lvls))
  y <- as.integer(factor(y, levels = lvls))
  if (ordermatters) {
    ismax <- x > y
    stplanr.key <- (ismax * 1) * (x^2 + x + y) + ((!ismax) * 1) * (y^2 + x)
  } else {
    a <- ifelse(x > y, y, x)
    b <- ifelse(x > y, x, y)
    stplanr.key <- b^2 + a
  }
  return(stplanr.key)
}
#' @export
#' @rdname od_id
od_id_max_min <- function(x, y) {
  d <- convert_to_numeric(x, y)
  a <- pmax(d$x, d$y)
  b <- pmin(d$x, d$y)
  a * (a + 1) / 2 + b
}

#' @export
#' @rdname od_id
od_id_character <- function(x, y) {
  paste(
    pmin(x, y),
    pmax(x, y)
  )
}

convert_to_numeric <- function(x, y) {
  if (length(x) != length(y)) stop("x and y are not of equal length")
  if (class(x) == "factor") x <- as.character(x)
  if (class(y) == "factor") y <- as.character(y)
  lvls <- unique(c(x, y))
  x <- as.integer(factor(x, levels = lvls))
  y <- as.integer(factor(y, levels = lvls))
  list(x = x, y = y)
}

od_id_order_base <- function(x, y) {
  d <- convert_to_numeric(x, y)
  x <- d$x
  y <- d$y
  paste(pmin(x, y), pmax(x, y))
}

not_duplicated <- function(x) {
  !duplicated(x)
}
#' Aggregate od pairs they become non-directional
#'
#' For example, sum total travel in both directions.
#' @param x A data frame or SpatialLinesDataFrame, representing an OD matrix
#' @param attrib A vector of column numbers or names, representing variables to be aggregated.
#' By default, all numeric variables are selected.
#' aggregate
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @return `oneway` outputs a data frame (or `sf` data frame) with rows containing
#' results for the user-selected attribute values that have been aggregated.
#' @param stplanr.key Optional key of unique OD pairs regardless of the order,
#' e.g., as generated by [od_id_max_min()] or [od_id_szudzik()]
#' @family od
#' @export
#' @details
#' Flow data often contains movement in two directions: from point A to point B
#' and then from B to A. This can be problematic for transport planning, because
#' the magnitude of flow along a route can be masked by flows the other direction.
#' If only the largest flow in either direction is captured in an analysis, for
#' example, the true extent of travel will by heavily under-estimated for
#' OD pairs which have similar amounts of travel in both directions.
#' Flows in both direction are often represented by overlapping lines with
#' identical geometries (see [flowlines()]) which can be confusing
#' for users and are difficult to plot.
#' @examples
#' (od_min = od_data_sample[c(1, 2, 9), 1:6])
#' (od_oneway = od_oneway(od_min))
#' nrow(od_oneway) < nrow(od_min) # result has fewer rows
#' sum(od_min$all) == sum(od_oneway$all) # but the same total flow
#' od_oneway(od_min, attrib = "all")
#' attrib <- which(vapply(flow, is.numeric, TRUE))
#' flow_oneway <- od_oneway(flow, attrib = attrib)
#' colSums(flow_oneway[attrib]) == colSums(flow[attrib]) # test if the colSums are equal
#' # Demonstrate the results from oneway and onewaygeo are identical
#' flow_oneway_geo <- onewaygeo(flowlines, attrib = attrib)
#' flow_oneway_sf <- od_oneway(flowlines_sf)
#' par(mfrow = c(1, 2))
#' plot(flow_oneway_geo, lwd = flow_oneway_geo$All / mean(flow_oneway_geo$All))
#' plot(flow_oneway_sf$geometry, lwd = flow_oneway_sf$All / mean(flow_oneway_sf$All))
#' par(mfrow = c(1, 1))
#' od_max_min <- od_oneway(od_min, stplanr.key = od_id_character(od_min[[1]], od_min[[2]]))
#' cor(od_max_min$all, od_oneway$all)
#' # benchmark performance
#' bench::mark(check = FALSE, iterations = 3,
#'   onewayid(flowlines_sf, attrib),
#'   od_oneway(flowlines_sf)
#' )
od_oneway <- function(x,
           attrib = names(x[-c(1:2)])[vapply(x[-c(1:2)], is.numeric, TRUE)],
           id1 = names(x)[1],
           id2 = names(x)[2],
           stplanr.key = NULL) {
    is_sf <- is(x, "sf")

  if (is.null(stplanr.key)) {
    id1_temp <- x[[id1]]
    x[[id1]] <- pmin(x[[id1]], x[[id2]])
    x[[id2]] <- pmax(id1_temp, x[[id2]])

    if (is_sf) {
      duplicated_oneway <- duplicated(sf::st_drop_geometry(x[c(id1, id2)]))
    } else {
      duplicated_oneway <- duplicated(x[c(id1, id2)])
    }

    if (is_sf) {
      x_sf <- x[!duplicated_oneway, c(id1, id2)]
      x <- sf::st_drop_geometry(x)
    }

    if (is.numeric(attrib)) {
      attrib <- attrib - 2 # account for 1st 2 columns being ids
    }
    x_grouped <- dplyr::group_by(x, !!rlang::sym(id1), !!rlang::sym(id2))
    x_oneway <- dplyr::ungroup(dplyr::summarise_at(x_grouped, attrib, sum))

    if (is_sf) {
      x_oneway <- sf::st_as_sf(dplyr::left_join(x_oneway, x_sf))
      # class(x_oneway) # sf
    }

    return(x_oneway)
  }

  if (is.numeric(attrib)) {
    attrib_names <- names(x)[attrib]
  } else {
    attrib_names <- attrib
    attrib <- which(names(x) %in% attrib)
  }

  # separate geometry for sf objects
  if (is_sf) {
    x_sf <- sf::st_sf(data.frame(stplanr.key, stringsAsFactors = FALSE), geometry = sf::st_geometry(x))
    x <- sf::st_drop_geometry(x)
  }

  x <- dplyr::bind_cols(x, stplanr.key = stplanr.key)

  x_oneway_grouped <- dplyr::group_by(x, stplanr.key)
  x_oneway <- dplyr::ungroup(dplyr::summarise_at(x_oneway_grouped, attrib, sum))

  # # next lines can extract ids - assuming not necessary for now:
  # x_ids_grouped <-
  #   dplyr::summarise(
  #     x_oneway_grouped,
  #     id1 = dplyr::first(!!rlang::sym(id1)),
  #     id2 = dplyr::first(!!rlang::sym(id2))
  #   )
  # x_oneway <- dplyr::bind_cols(
  #   x_oneway,
  #   x_ids_grouped
  # )

  if (is_sf) {
    x_sf <- x_sf[!duplicated(x_sf$stplanr.key), ]
    x_oneway <- sf::st_as_sf(dplyr::inner_join(x_oneway, x_sf))
    # class(x_oneway) # sf
  }

  # x_oneway$stplanr.key <- NULL
  # names(x_oneway)[1:2] <- c(id1, id2)

  return(x_oneway)
}

Try the od package in your browser

Any scripts or data that you put into this service are public.

od documentation built on Sept. 11, 2024, 9:04 p.m.