R/findORFs.r

Defines functions findORFs

Documented in findORFs

#' Identify all variants of open reading frames in a nucleotide sequence
#' @description Identify all possible variants of open reading frames
#'     in a nucleotide sequence of interest.
#' @param x character string giving the nucleotide sequence of interest.
#' @param codStart character string with type of start codon: "ATG", "GTG",
#'      "TTG" or "CTG". Default value is "ATG".
#' @return matrix with start and stop positions, length and sequence of
#'     identified variants of open reading frames.
#' @author Vasily V. Grinev
#' @examples
#' x <- "AAAATGGCTGCGTAATGCAAAATGGCTGCGAATGCAAAATGGCTGCGAATGCCGGCACGTTGCTACGT"
#' orf <- findORFs(x=x, codStart="ATG")
#' @export

findORFs <- function(x, codStart="ATG"){
    ### Calculation coordinates of defined start codon and all stop codons 
    #   in the sequence of interest.
    allPos <- codonStartStop(x=x)
    allStarts <- allPos[[1]][allPos[[2]] == codStart]
    allStops <- allPos[[1]][allPos[[2]] %in% c("TAA", "TAG", "TGA")]
    ### Development a matrix of in-frame calculated coordinates.
    inFrame <- outer(X=allStops, Y=allStarts, FUN="-")
    L <- inFrame[lower.tri(x=inFrame, diag=TRUE)]/3
    L <- round(x=L) == L & L > 0
    if (length(x=L[L == "TRUE"]) > 0){
        ### Calculation of the in-frame start codons.
        inFrameStarts <- t(x=inFrame)
        inFrameStarts[] <- allStarts
        inFrameStarts <- t(x=inFrameStarts)
        inFrameStarts <- inFrameStarts[lower.tri(x=inFrameStarts, diag=TRUE)]
        inFrameStarts <- inFrameStarts[L]
        ### Calculation of the in-frame stop codons.
        inFrameStops <- inFrame
        inFrameStops[] <- allStops
        inFrameStops <- inFrameStops[lower.tri(x=inFrameStops, diag=TRUE)]
        inFrameStops <- inFrameStops[L]
        ### Calculation the coordinates of open reading frames.
        orfs <- cbind(inFrameStarts, inFrameStops)
        orfs <- aggregate(x=orfs[, "inFrameStops"],
                          by=list(orfs[, "inFrameStarts"]),
                          FUN=min)
        orfs <- aggregate(x=orfs[, "Group.1"], by=list(orfs[, "x"]), FUN=min)
        orfs <- orfs[order(orfs[, "x"]), ]
        seqorfs <- substring(x, first=orfs[, "x"], last=orfs[, "Group.1"] + 2)
        ### Final object.
        orfs <- list(orfs[, "x"],
                     orfs[, "Group.1"] + 2,
                     orfs[, "Group.1"] - orfs[, "x"] + 3,
                     seqorfs)
        orfs <- do.call(what=cbind, args=orfs)
        colnames(orfs) <- c("start", "end", "length", "orf.sequence")
        return(orfs)
    }
    return(NULL)
}
rfctbio-bsu/ORFhunteR documentation built on April 23, 2023, 8:29 p.m.