R/oneway.R
In od: Manipulate and Map Origin-Destination Data

Documented in od_id_character od_id_max_min od_id_order od_id_szudzik od_oneway

#' Aggregate OD pairs they become non-directional
#'
#' For example, sum total travel in both directions.
#' @param x A data frame or SpatialLinesDataFrame, representing an OD matrix
#' @param attrib A vector of column numbers or names, representing variables to be aggregated.
#' By default, all numeric variables are selected.
#' @param FUN The aggregating function such as `sum` (the default) and `mean`
#' @param ... Further arguments passed to or used by methods
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @return `oneway` outputs a data frame (or `sf` data frame) with rows containing
#' results for the user-selected attribute values that have been aggregated.
#' @param oneway_key Optional key of unique OD pairs regardless of the order,
#' e.g., as generated by [od_id_max_min()] or [od_id_szudzik()]
#' @export
#' @details
#' Flow data often contains movement in two directions: from point A to point B
#' and then from B to A. This can be problematic for transport planning, because
#' the magnitude of flow along a route can be masked by flows the other direction.
#' If only the largest flow in either direction is captured in an analysis, for
#' example, the true extent of travel will by heavily under-estimated for
#' OD pairs which have similar amounts of travel in both directions.
#' Flows in both direction are often represented by overlapping lines with
#' identical geometries which can be confusing
#' for users and are difficult to plot.
#' @examples
#' (od_min = od_data_df[c(1, 2, 1), 1:4])
#' od_min[3, 1:2] = rev(od_min[3, 1:2])
#' od_min[3, 3:4] = od_min[3, 3:4] - 5
#' (od_oneway = od_oneway(od_min))
#' nrow(od_oneway) < nrow(od_min) # result has fewer rows
#' sum(od_min$all) == sum(od_oneway$all) # but the same total flow
#' (od_oneway = od_oneway(od_min, FUN = mean))
#' od_oneway(od_min, attrib = "all")
#' od_min$all[3] = NA
#' (od_oneway = od_oneway(od_min, FUN = mean, na.rm = TRUE))
od_oneway = function(x,
                     attrib = names(x[-c(1:2)])[vapply(x[-c(1:2)], is.numeric, TRUE)],
                     FUN = sum,
                     ...,
                     id1 = names(x)[1],
                     id2 = names(x)[2],
                     oneway_key = NULL) {
  # is_sf = is(x, "sf") # only make it work with dfs for now

  if (is.null(oneway_key)) {
    id1_temp = x[[id1]]
    x[[id1]] = pmin(x[[id1]], x[[id2]])
    x[[id2]] = pmax(id1_temp, x[[id2]])
  }

  if (is.numeric(attrib)) {
    attrib = attrib - 2 # account for 1st 2 columns being ids
  }
  x_oneway = stats::aggregate(x[attrib], list(o = x[[id1]], d = x[[id2]]), FUN, ...)

  if (is.numeric(attrib)) {
    attrib_names = names(x)[attrib]
  } else {
    attrib_names = attrib
    attrib = which(names(x) %in% attrib)
  }

  return(x_oneway)
}
#' Generate ordered ids of OD pairs so lowest is always first
#' This function is slow on large datasets, see szudzik_pairing for faster alternative
#'
#' @param x A data frame representing OD pairs
#' @param id1 Optional (it is assumed to be the first column)
#' text string referring to the name of the variable containing
#' the unique id of the origin
#' @param id2 Optional (it is assumed to be the second column)
#' text string referring to the name of the variable containing
#' the unique id of the destination
#' @examples
#' x = data.frame(id1 = c(1, 1, 2, 2, 3), id2 = c(1, 2, 3, 1, 4))
#' od_id_order(x) # 4th line switches id1 and id2 so oneway_key is in order
#' @export
od_id_order = function(x, id1 = names(x)[1], id2 = names(x)[2]) {
  data.frame(
    stringsAsFactors = FALSE,
    stplanr.id1 = x[[id1]],
    stplanr.id1 = x[[id2]],
    oneway_key = od_id_character(x[[id1]], x[[id2]])
  )
}
#' Combine two ID values to create a single ID number
#'
#' @details
#' In OD data it is common to have many 'oneway' flows from "A to B" and "B to A".
#' It can be useful to group these an have a single ID that represents pairs of IDs
#' with or without directionality, so they contain 'twoway' or bi-directional values.
#'
#' `od_id*` functions take two vectors of equal length and return a vector of IDs,
#' which are unique for each combination but the same for twoway flows.
#'
#' -  the Szudzik pairing function, on two vectors of equal
#' length. It returns a vector of ID numbers.
#'
#' This function superseeds od_id_order as it is faster on large datasets
#' @param x a vector of numeric, character, or factor values
#' @param y a vector of numeric, character, or factor values
#' @param ordermatters logical, does the order of values matter to pairing, default = FALSE
#' @family od
#' @seealso od_oneway
#' @name od_id
#' @examples
#' (d = od_data_df[2:9, 1:2])
#' (id = od_id_character(d[[1]], d[[2]]))
#' duplicated(id)
#' od_id_szudzik(d[[1]], d[[2]])
#' od_id_max_min(d[[1]], d[[2]])
NULL
#' @rdname od_id
#' @export
od_id_szudzik = function(x, y, ordermatters = FALSE) {
  if (length(x) != length(y)) {
    stop("x and y are not of equal length")
  }

  if (methods::is(x, "factor")) {
    x = as.character(x)
  }
  if (methods::is(y, "factor")) {
    y = as.character(y)
  }
  lvls = unique(c(x, y))
  x = as.integer(factor(x, levels = lvls))
  y = as.integer(factor(y, levels = lvls))
  if (ordermatters) {
    ismax = x > y
    oneway_key = (ismax * 1) * (x^2 + x + y) + ((!ismax) * 1) * (y^2 + x)
  } else {
    a = ifelse(x > y, y, x)
    b = ifelse(x > y, x, y)
    oneway_key = b^2 + a
  }
  return(oneway_key)
}
#' @export
#' @rdname od_id
od_id_max_min = function(x, y) {
  d = convert_to_numeric(x, y)
  a = pmax(d$x, d$y)
  b = pmin(d$x, d$y)
  a * (a + 1) / 2 + b
}

#' @export
#' @rdname od_id
od_id_character = function(x, y) {
  paste(
    pmin(x, y),
    pmax(x, y)
  )
}

convert_to_numeric = function(x, y) {
  if (length(x) != length(y)) stop("x and y are not of equal length")
  if (methods::is(x, "factor")) x = as.character(x)
  if (methods::is(y, "factor")) y = as.character(y)
  lvls = unique(c(x, y))
  x = as.integer(factor(x, levels = lvls))
  y = as.integer(factor(y, levels = lvls))
  list(x = x, y = y)
}

od_id_order_base = function(x, y) {
  d = convert_to_numeric(x, y)
  x = d$x
  y = d$y
  paste(pmin(x, y), pmax(x, y))
}

not_duplicated = function(x) {
  !duplicated(x)
}