#' Number of in-frame P-sites per coding sequence or transcript.
#'
#' This function generates a data table that for each transcript contains at
#' least i) its name; ii) its length or the length of its annotated coding
#' sequence (if any); iii) the number of P-sites falling in its annotated coding
#' sequence or in the whole transcript for all samples. When the coverage is
#' computed for coding sequences and not whole transcripts, a chosen number of
#' nucleotides at the beginning and/or at the end of the CDSs can be excluded
#' for restricting the analysis to a portion of the original coding sequence. In
#' this case the output data table includes an additional column reporting the
#' length of the selected region. Moreover, either all P-sites or only in-frame
#' P-sites can be considered. Finally, transcripts without annotated CDS are
#' automatically discarded.
#'
#' @param data Either list of data tables or GRangesList object from
#' \code{\link{psite_info}}.
#' @param annotation Data table as generated by \code{\link{create_annotation}}.
#' @param start_nts Positive integer specifying the number of nucleotides at the
#' beginning of the coding sequences to be excluded from the analisys. Default
#' is 0. This parameter is considered only if \code{whole_transcript} is
#' FALSE.
#' @param stop_nts Positive integer specifying the number of nucleotides at the
#' end of the coding sequences to be excluded from the analisys. Default is 0.
#' This parameter is considered only if \code{whole_transcript} is FALSE.
#' @param in_frame Logical value whether to consider only in-frame P-sites when
#' computing the coverage. Default is TRUE. This parameter is considered only
#' if \code{whole_transcript} is FALSE.
#' @param whole_transcript Logical value whether to compute the coverage based
#' on P-sites falling in any region of the transcript, i.e. CDS and UTRs.
#' Default is FALSE.
#' @return A data table.
#' @examples
#' ## data(reads_list)
#' ## data(mm81cdna)
#' ##
#' ## ## compute and add p-site datails
#' ## psite_offset <- psite(reads_list, flanking = 6, extremity = "auto")
#' ## reads_psite_list <- psite_info(reads_list, psite_offset)
#' ##
#' ## ## Compute the number of in-frame P-sites per whole coding sequences.
#' ## psite_cds <- cds_coverage(reads_psite_list, mm81cdna)
#' ##
#' ## ## Compute the number of in-frame P-sites per coding sequences exluding
#' ## ## the first 15 nucleotides and the last 10 nucleotides.
#' ## psite_cds <- cds_coverage(reads_psite_list, mm81cdna,
#' ## start_nts = 15, stop_nts = 10)
#' ##
#' ## ## Compute the number of P-sites per transcripts.
#' ## psite_cds <- cds_coverage(reads_psite_list, mm81cdna,
#' ## whole_transcript = TRUE)
#' @import data.table
#' @export
cds_coverage <- function(data, annotation, start_nts = 0, stop_nts = 0,
in_frame = TRUE, whole_transcript = FALSE) {
if(whole_transcript == TRUE & (start_nts != 0 | stop_nts != 0)){
cat("\n")
warning("either start_nts or stop_nts are not 0 but parameter whole_transcript = TRUE: start_nts and stop_nts won't be considered\n", call. = FALSE)
}
if(whole_transcript == FALSE){
psite_cds <- annotation[l_cds > start_nts + stop_nts,
list(transcript, length_cds = l_cds)]
if(start_nts != 0 | stop_nts != 0){
psite_cds[, length_selection := length_cds - start_nts - stop_nts]
}
} else {
psite_cds <- annotation[, list(transcript, length_tr = l_tr)]
}
for (n in names(data)) {
cat(sprintf("processing %s\n", n))
dt <- data[[n]]
if(class(dt)[1] == "GRanges"){
dt <- as.data.table(dt)[, c("width", "strand") := NULL
][, seqnames := as.character(seqnames)]
setnames(dt, c("seqnames", "start", "end"), c("transcript", "end5", "end3"))
}
if(whole_transcript == FALSE){
dt <- dt[psite_from_start >= start_nts &
psite_from_stop <= -stop_nts]
if(in_frame == TRUE){
dt <- dt[psite_from_start %% 3 == 0]
}
}
temp_count <- dt[, .N, by = transcript]
psite_cds <- merge(psite_cds, temp_count, by = "transcript", all.x = TRUE)
setnames(psite_cds, old = "N", new = n)
}
psite_cds[is.na(psite_cds)] <- 0
return(psite_cds)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.