R/circular_cut.R

Defines functions circular_cut

Documented in circular_cut

##' Takes a full sequence and a starting point and returns a circularly wrapped cut
##'
##' @param seq full length sequence
##' @param length length of the sequence
##' @param start.pos starting position for the chop
##' @details Currently returns a vector with no attributes, since those will be handled at the level of the function that calls this
##' @note This function is not vectorized, because of its reliance on if statements. Not sure how to handle that.
##' @return Returns a character vector with the chopped sequence
##'
circular_cut <- function(seq_data, len, start.pos) {

  # Check whether length of len and length of start.pos are all the same
  if(length(len) != length(start.pos) & length(len) != 1 & length(start.pos) != 1) {
    warning("len and start.pos have unqual lengths (neither of which are 1). cicrular_cut() is not set up to handle that case.")
  }

  # # Note that a single sequence has length() of one
  # seq.length <- stringr::str_length(seq_data) # properly vectorized
  seq.length <- length(seq_data)

  # If the sampling point is within 1000 of the end of the sequence, make it wrap
  if(start.pos + len > seq.length + 1) {

    # Deal with the special case where the starting position is the last position of the sequence
    if(start.pos == seq.length) {
      end.pos2 <- len - 1

      } else {
      # The much more common case (still rare relative to below, in real sequences, probably),
      #   in which the start position is within len of the end of the sequence, but is not the last element of the sequence
      end.pos2 <- len - (seq.length - start.pos + 1)
      }


    # Say length(seq) is 26
    # And start.pos is 23
    # 26 - 3 is 23
    # So you need 2 more; 26 - 23 - 1

    chopped.seq <- rep(NA, len)
    chopped.seq[1: (seq.length - start.pos + 1)] <- seq_data[start.pos : seq.length]
    chopped.seq[(seq.length - start.pos + 2) : len] <- seq_data[1 : (len - (seq.length - start.pos) - 1)]


    # first.part <- stringr::str_sub(seq_data, start = start.pos, end = seq.length)
    # second.part <- stringr::str_sub(seq_data, start = 1, end = end.pos2)
    # chopped.seq <- stringr::str_c(first.part, second.part)

  } else {
    # Usual case, when chopped sequence doesn't wrap around the end of the raw sequence
    end.pos <- start.pos + len - 1
    chopped.seq <- seq_data[start.pos : end.pos]
    #chopped.seq <- stringr::str_sub(seq_data, start=start.pos, end=end.pos) # crap, should definitely change name of seq variable
  }

  # Add attributes from sequence
  attr(chopped.seq, "name") <- attr(seq_data, "name")
  attr(chopped.seq, "Annot") <- attr(seq_data, "Annot")

  # Auto-print the chopped sequence
  chopped.seq
}
adsteen/genomechop documentation built on May 18, 2019, 8:11 p.m.