R/greedy.R

Defines functions two_ranges_greedy greedy_algorithm

Documented in greedy_algorithm

#' Greedy algorithm for a set of overlapping intervals.
#'
#' @description A function to create a set of non-overlapping intervals using a
#' greedy algorithm by prefering intervals with higeher scores.
#'
#' @param granges_list A set of intervals as a \linkS4class{GRangesList} object.
#'   Requires a score column to be specified in \code{max_score}.
#' @param max_score Name of a column in \code{granges_list} containing integers
#'   or floats to be used as a score to maximize over.
#' @param overlap A non-negative integer specifying the allowed overlap between
#'   two intervals.
#' @param is_circular Boolean. If set true, intervals are assumed to be on a
#'   circular scale.
#' @param overhang A non-negative integer how much of the end is overlapping
#'   with the start. specifying for a linearized circular scale  Ignored if
#'   is_circular is FALSE.
#' @param use_strand Boolean. If TRUE, strand information is used to detect
#'   overlaps
#' @param second_range Optional, name of a column in granges_list containing
#'   another \linkS4class{GRangesList} object. If set, for intervals in
#'   granges_list to be compatible, they also have to be compatible in
#'   second_range.
#' @return A \linkS4class{GRangesList} object with intervals compatible
#'   according to the specified options.
#' @details The algorithm seeks to maximize the sum of a score given in
#'   max_score, but does not necessarily return the best possible solution.

#'  Created mainly for comparison with the WIS-algorithm function [get_wis].
#'
#'  When is_circular is TRUE, intervals exceeding the end of the scale given by
#'  (seqlength(granges_list) - overhang) are wrapped to the beginning.
#'
#'
#' @seealso \url{https://doi.org/10.1093/bioinformatics/bth324} for description
#'   of the algorithm
#' @export
#'
#'
#'
################################################################################
#greedy_algorithm
################################################################################
greedy_algorithm <- function(granges_list, max_score = "score",
                             overlap = 0, is_circular = F,
                             overhang = 0, use_strand = F,
                             second_range = NULL) {

  Check <- checkmate::makeAssertCollection()
  checkmate::assertClass(granges_list, classes = "GRanges", add = Check)
  checkmate::assertSubset(max_score,
               choices = colnames(mcols(granges_list)),
               empty.ok = F,
               add = Check)
  checkmate::assertInt(overlap, na.ok = F, lower = 0, add = Check)
  checkmate::assertInt(overhang, na.ok = F, lower = 0, add = Check)
  checkmate::assertSubset(second_range,
               choices = colnames(mcols(granges_list)),
               empty.ok = T,
               add = Check)

  checkmate::reportAssertions(Check)

  # cat("greedy_algorithm was called with following parameters:\nmax_score\t", max_score, "\noverlap\t", overlap,
  #     "\nis_circular\t", is_circular, "\noverhang\t", overhang, "\nuse_strand\t", use_strand, "\nsecond_range\t",
  #     second_range, "\n")
  start_g <- Sys.time()
  print("in greedy")
  if(length(isCircular(granges_list)) > 1) {
    warning("BLAST-Hits consist of more than one sequence, behaviour of
            algorithm is uncertain in that case")
  }
  isCircular(granges_list) <- rep(is_circular,
                                  length(isCircular(granges_list)))

  # remove stuff that aligns only to the overhang
  if (is_circular) {
    seqlengths(granges_list) <- seqlengths(granges_list) - overhang
    granges_list <- granges_list[which(
      start(granges_list) < seqlengths(granges_list)
    )]
  }


  granges_list$ID <- 1: length(granges_list)

  if ( length( granges_list ) > 1 ) {
    granges_list <- sort(granges_list, decreasing = T,
                         by = ~eval(as.name(max_score)))
    # create new empty set to be filled and run algorithm
    new_granges <- granges_list[rep(FALSE, times = length(granges_list))]
    while ( length(granges_list) > 0) {
      next_range <- granges_list[1]
      no_overlaps <- which(countOverlaps(granges_list,
                                         next_range,
                                         ignore.strand = !use_strand,
                                         minoverlap = (overlap + 1)) == 0)
      # if second set is given, check for its compatibility
      if (!is.null(second_range)) {
        second_no_overlaps <- which(
          countOverlaps(mcols(granges_list[no_overlaps])[[second_range]],
                        mcols(next_range)[[second_range]], ignore.strand = !use_strand,
                        minoverlap = (overlap + 1)) == 0)
        no_overlaps <- no_overlaps[second_no_overlaps]
      }
      granges_list <- granges_list[no_overlaps,]
      new_granges <- c(new_granges, next_range)
    }
    return(new_granges)
  } else { return( granges_list ) }
}




two_ranges_greedy <- function(granges_list, is_circular = F, overhang = 0, use_strand = F,
                                   max_score = "score",
                                   overlap = 0,
                                   second_range = NULL) {
  print("running get wis on first range")
  granges_list <- greedy_algorithm(granges_list, is_circular = is_circular, overhang = overhang, use_strand = use_strand,
                              max_score = max_score,
                              overlap = overlap)
  if (!is.null(second_range)) {
    print("running get wis for given second range: ")
    print(second_range)
    granges_list$secondID <- 1:length(granges_list)
    second_list <- mcols(granges_list)[[second_range]]
    mcols(second_list) <- mcols(granges_list)
    second_list <- greedy_algorithm(second_list, is_circular = is_circular, overhang = overhang, use_strand = use_strand,
                                    max_score = max_score,
                                    overlap = overlap)
    granges_list <- granges_list[second_list$secondID]
    granges_list$secondID <- NULL
  }
  return(granges_list)
}
robbueck/wisard documentation built on Jan. 25, 2022, 12:35 a.m.