R/window.R

Defines functions slide_plot slide_apply slide_codon slide

Documented in slide slide_apply slide_codon slide_plot

#' Generate sliding window intervals
#'
#' \code{slide} creates a data.table defining sliding window positions for 
#' analyzing sequences or data along a continuous range. This function provides 
#' the foundation for positional analyses of codon usage patterns within genes.
#'
#' @param from Integer specifying the start position of the analysis range.
#' @param to Integer specifying the end position of the analysis range.
#' @param step Integer specifying the step size between consecutive window centers 
#'   (default: 1). Larger values create non-overlapping or less overlapping windows.
#' @param before Integer specifying the number of positions to include before 
#'   the window center (default: 0). Determines the left boundary of each window.
#' @param after Integer specifying the number of positions to include after 
#'   the window center (default: 0). Determines the right boundary of each window.
#' @return A data.table with three columns:
#'   \itemize{
#'     \item \code{start}: Start position of each window
#'     \item \code{center}: Center position of each window  
#'     \item \code{end}: End position of each window
#'   }
#' @export
#' @examples
#' # Create sliding windows with step size 2 and window size 3
#' slide(1, 10, step = 2, before = 1, after = 1)
#'
slide <- function(from, to, step = 1, before = 0, after = 0){
    if(to - from + 1 < (1 + before + after)){
        stop('The window size is too large')
    }
    center <- seq(from + before, to - after, by = step)
    start <- center - before
    end <- center + after
    data.table::data.table(start = start, center = center, end = end)
}


#' Generate sliding windows for codon-level analysis
#'
#' \code{slide_codon} creates sliding window intervals specifically designed 
#' for codon-based analysis of DNA sequences. This function automatically 
#' handles codon boundaries and is useful for studying positional effects 
#' in codon usage within genes.
#'
#' @param seq A DNA sequence as a DNAString object, or any object that can 
#'   be coerced to DNAString.
#' @param step Integer specifying the step size between consecutive window centers 
#'   in codons (default: 1). A step of 3 creates non-overlapping windows.
#' @param before Integer specifying the number of codons to include before 
#'   the window center (default: 0).
#' @param after Integer specifying the number of codons to include after 
#'   the window center (default: 0).
#' @return A data.table with three columns containing nucleotide positions:
#'   \itemize{
#'     \item \code{start}: Start nucleotide position of each window
#'     \item \code{center}: Center nucleotide position of each window
#'     \item \code{end}: End nucleotide position of each window
#'   }
#' @export
#' @examples
#' # Create sliding windows for codon analysis
#' x <- Biostrings::DNAString('ATCTACATAGCTACGTAGCTCGATGCTAGCATGCATCGTACGATCGTCGATCGTAG')
#' slide_codon(x, step = 3, before = 1, after = 1)
#'
slide_codon <- function(seq, step = 1, before = 0, after = 0){
    if(!inherits(seq, 'DNAString')){
        seq <- Biostrings::DNAString(seq)
    }
    slen <- (length(seq) %/% 3) * 3
    if(slen < 3 * (1 + before + after)){
        stop('The window size is too large')
    }
    slide(from = 1, to = slen, step = step * 3,
          before = before * 3, after = 2 + after * 3)
}


#' apply a cub index to a sliding window
#'
#' \code{slide_apply} applies a function to a sliding window of codons.
#'
#' @param seq DNAString, the sequence
#' @param .f function, the codon index calculation function to apply, for
#'   example, \code{get_enc}.
#' @param step integer, the step size in number of codons
#' @param before integer, the number of codons before the center of a window
#' @param after integer, the number of codons after the center of a window
#' @param ... additional arguments to pass to the function \code{.f}
#' @return data.table with start, center, end, and codon usage index columns
#' @importFrom data.table ':='
#' @export
#' @examples
#' slide_apply(yeast_cds[[1]], get_enc, step = 1, before = 10, after = 10)
#'
slide_apply <- function(seq, .f, step = 1, before = 0, after = 0, ...){
    index <- NULL
    if(!inherits(seq, 'DNAString')){
        seq <- Biostrings::DNAString(seq)
    }
    windt <- slide_codon(seq, step = step, before = before, after = after)
    winseq <- Biostrings::Views(seq, start = windt$start, end = windt$end)
    winseq <- Biostrings::DNAStringSet(winseq)
    windt[, index := .f(count_codons(winseq), ...)]
    windt[]
}


#' plot sliding window codon usage
#'
#' \code{slide_plot} visualizes codon usage in sliding window.
#'
#' @param windt data.table, the sliding window codon usage
#'   generated by \code{slide_apply}.
#' @param index_name character, the name of the index to display.
#' @return ggplot2 plot.
#' @export
#' @examples
#' sw <- slide_apply(yeast_cds[[1]], get_enc, step = 1, before = 10, after = 10)
#' slide_plot(sw)
slide_plot <- function(windt, index_name='Index'){
    center <- index <- NULL
    ggplot2::ggplot(windt, ggplot2::aes(x = ceiling(center/3), y = index)) +
        ggplot2::geom_line() +
        ggplot2::geom_point() +
        ggplot2::geom_smooth(method = 'loess', formula = y ~ x) +
        ggplot2::geom_hline(yintercept = stats::median(windt$index), linetype = 2, color = 'red') +
        ggplot2::labs(x = 'Codon position', y = index_name) +
        ggplot2::theme_classic(base_size = 12) +
        ggplot2::theme(axis.text = ggplot2::element_text(color = 'black'))
}

Try the cubar package in your browser

Any scripts or data that you put into this service are public.

cubar documentation built on Aug. 21, 2025, 5:40 p.m.