R/intersect_ranges.R

Defines functions intersect_ranges

Documented in intersect_ranges

#' intersect_ranges
#'
#' Function to find the maximum intersection between a set
#' of numeric ranges, in this case first and last appearence
#' datums on taxonomic ranges.
#' @param x A numeric data.frame or matrix of ranges. If
#' just two columns are supplied, the first column is assumed
#' to be the srt column
#' @param srt If x contains more than two columns, srt is the
#' name of the range base column - the FAD
#' @param end If x contains more than two columns, end is the
#' name of the range top column - the LAD
#' @param verbose A logical indicating whether the function
#' should report progress to the console
#' @return A matrix with three columns, indicating the intersection
#' (FAD and LAD) and the number of ranges that intersection
#' encompasses
#' @export
#' @examples
#' # plot an example
#' df <- cbind(c(1.5, 3, 2.1, 1), c(6, 5, 3.7, 10.1))
#' plot(1:11, ylim = c(0, 5), col = NA)
#' segments(x0 = c(1.5, 3, 2.1, 1), y0 = 1:4, x1 = c(6, 5, 3.7, 10.1), y1 = 1:4)
#' abline(v = 3, col  = "red", lty = 2)
#' abline(v = 3.7, col  = "red", lty = 2)
#' # intersect function
#' intersect_ranges(df)

intersect_ranges <- function(x, srt = NULL, end = NULL, verbose = TRUE) {

  # check x
  if(!exists("x")) {
    stop("Please supply x as a dataframe or matrix of ranges")
  }
  if(!class(x)[1] %in% c("data.frame", "matrix")) {
    stop("Please supply x as a dataframe or matrix of ranges")
  }
  if(ncol(x) < 2) {
    stop("x must minimally contain a srt column and a lad column")
  }
  if(ncol(x) > 2) {
    if(is.null(srt) | is.null(end))
      stop("If x contains more than two columns, srt and end must be specified")
    x <- x[, c(srt, end)]
  }
  if(!is.numeric(x[,1]) | !is.numeric(x[,2])) {
    stop("x must be numeric")
  }
  if(any(is.na(x))) {
    warning("x contains missing values - these rows will be dropped")
    x <- x[complete.cases(x),]
  }
  if(nrow(x) < 1) {
    stop("x does not contain any rows (this may be due to internal removal of incomplete rows)")
  }
  if(any(x[,srt] < x[,end])) {
    stop("One or more maximum ages in x are smaller than their corresponding minimum ages")
  }

  # rank everything
  # in case the same date is given, they will have the same rank.
  # this will create additional 'slots in the logical matrix,
  # which should not be an issue.
  ranked <- rank(x, ties.method = "min")

  # copy the dimensions
  dim(ranked) <- dim(x)

  # compare the ranks
  where <- rep(0, max(ranked) * 2 - 1)
  for(i in 1:nrow(x)){
    # index to increment
    ind <- seq(ranked[i,1] * 2 - 1, ranked[i,2] * 2 - 1)
    # increment in this interval
    where[ind] <- where[ind] + 1
  }

  # no overlap
  if(all(where == 1)){
    if(verbose)	message("No overlap found")
    results <- cbind(1, 1, 1)
    colnames(results) <- c("lb", "ub", "N")
  } else {

    # find the indices!
    # what is the maximum overlap?
    maxVals <- max(where)

    # print to the terminal
    if(verbose) {
      # report the number of overlapping ranges
      if(maxVals == nrow(x)){
        message("All ranges overlap.")
      } else {
        message(paste(maxVals, "out of the", nrow(x), "ranges overlap."))
      }
    }

    # are there multiple solutions???
    streaks <- which(maxVals == where)

    # are there multiple solutions???
    # shift the index of the difference by 1
    diffs <- c(0, diff(streaks))

    # changepoints between solutions (index of diffs)
    # the number of solutions is the length
    changes <- c(1, which(diffs > 1))

    # final result - one row for every solution!
    results <- matrix(ncol = 3, nrow = length(changes))

    # for every solution
    for(i in 1:length(changes)){
      # normal case
      if(i != length(changes)){
        # one solutions
        solStreaks <- streaks[changes[i]:(changes[i + 1] - 1)]
        # in case there are ties, there will be overlaps that leads to repetitions
        # last iteration
      } else {
        solStreaks <- streaks[changes[i]:length(streaks)]
      }

      # find the values that correspond to the indices
      fadInLogical <- min(solStreaks)
      ladInLogical <- max(solStreaks)

      # has to be transformed
      # do an assertion
      # the overlap intervals should range from date to date.
      # neither of these should be even col. indices in the logical matrix,
      # or things are f-ed up.
      if(fadInLogical%%2 == 0) stop("FAD in logical matrix should not be even!")
      if(ladInLogical%%2 == 0) stop("FAD in logical matrix should not be even!")

      # the rank of starting interval
      startDateRank <- (fadInLogical + 1) / 2
      endDateRank <- (ladInLogical + 1) / 2

      # corresponding dates
      first <- x[ranked == startDateRank]
      last <- x[ranked == endDateRank]

      # take the first result in the case of potential ties that rank() outputs.
      results[i, ] <- c(first[1],last[1], maxVals[1])
    }
    # copy the column names of the original entry
    colnames(results) <- c("lb", "ub", "N")
    # indicate how many overlaps there are
    rownames(results) <- paste0(maxVals, "_", 1:nrow(results))
  }
  # return object
  return(as.data.frame(results))
}

Try the fossilbrush package in your browser

Any scripts or data that you put into this service are public.

fossilbrush documentation built on June 10, 2025, 9:14 a.m.