#' Classify tsRNA/tRFs from PAC object
#'
#' Classifying tRFs/tsRNA into 5'-tRF/halves, 3'-tRFs/halves, i'-tRFs/halves, as
#' well as acceptor and decoder isotypes.
#'
#' Given a map object with range types generated by
#' \code{\link[seqpac]{PAC_mapper}} followed by
#' \code{\link[seqpac]{map_rangetype}} functions, sequences in a PAC object are
#' classified according the terminals (5'/3'/i'), anticodon loop (half/tRF), and
#' isotype (decoder/acceptor) of the full length tRNA.
#'
#' @family PAC analysis
#'
#' @seealso \url{https://github.com/Danis102} for updates on the current
#' package.
#'
#' @param PAC PAC-list object.
#'
#' @param map Map object with loop annotations generated by
#' \code{\link[seqpac]{PAC_mapper}} followed by
#' \code{\link[seqpac]{map_rangetype}} functions. Note, only full length
#' reference tRNAs annotated with 3 loops in the ss file (see:
#' \code{\link[seqpac]{map_rangetype}}) are used in the classifications.
#' Explanation: sometimes tRNAscan-SE scans results in ss files with
#' incomplete secondary structures with either more or fewer loops than the D,
#' T and anticodon loops found in canonical tRNAs. Such tRNAs will be removed
#' from the analysis.
#'
#' @param terminal Integer setting the terminal tolerance. Example, when
#' terminal is set to 5 (default) sequenced reads starting within the 5 first
#' nucleotides of the full-length tRNA reference will be classifed as 5'. If
#' it instead ends within the 5 last nucleotides of the full-length tRNA
#' reference it will be classified as 3'. If a read neither starts nor ends
#' within the terminal threshold it will be classified as i' (internal). Note,
#' for tsRNA/tRF classification we recommend a threshold of terminal=2 (not
#' default). Advisably, however, \code{\link[seqpac]{PAC_mapper}} runs with
#' N_up = "NNN" and N_down = "NNN" when mapping tsRNA/tRFs. Thus, the default
#' threshold of 5 is used when full-length tRNAs has been extended with NNN
#' up- and downstream to compensate for possible fragments from mature tRNAs
#' (3+2). Always double check the exact fragment using
#' \code{\link[seqpac]{PAC_covplot}}.
#'
#' @return Merged PAC object with an extended Anno table containing the
#' tRF/tsRNA classifications.
#'
#' @examples
#'
#' # Important: See ?PAC_trna on how to do tRNA analysis using Seqpac
#'
#' #'###########################################################
#'### tRNA classification in seqpac
#'# (More details see vignette and manuals.)
#'
#'##-------------------------------##
#'## Setup environment for testing ##
#'
#'# First create an annotation blank PAC with group means
#'load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata",
#' package = "seqpac", mustWork = TRUE))
#'anno(pac) <- anno(pac)[,1, drop=FALSE]
#'pac_trna <- PAC_summary(pac, norm = "cpm", type = "means",
#' pheno_target=list("stage"), merge_pac = TRUE)
#'
#'
#'# Then load paths to example trna ref and ss files
#'trna_ref <- system.file("extdata/trna2", "tRNA2.fa",
#' package = "seqpac", mustWork = TRUE)
#'
#'ss_file <- system.file("extdata/trna2", "tRNA2.ss",
#' package = "seqpac", mustWork = TRUE)
#'
#'
#'##--------------------------------------##
#'## Create a map object using PAC_mapper ##
#'map_object <- PAC_mapper(pac_trna, ref=trna_ref,
#' N_up = "NNN", N_down = "NNN",
#' mismatches=0, threads=2,
#' report_string=TRUE, override = TRUE)
#'# Warning: override = TRUE, will remove everything in temporary output path.
#'# Note: bowtie indexes are not needed for PAC_mapper.
#'
#'
#'##-------------------------------------------##
#'## Classify tRNA fragment with map_rangetype ##
#'
#'# Classify according to loop structure using ss file provided with seqpac
#'map_object_ss <- map_rangetype(map_object, type="ss", ss=ss_file,
#' min_loop_width=4)
#'# Note 1: You may download your own ss file at for example GtRNAdb
#'# Note 2: The ss file must match the reference used in creating the map_object
#'
#' map_object_ss[[1]]
#'
#' @export
tRNA_class <- function(PAC, map, terminal = 5){
## Check S4
if(isS4(PAC)){
tp <- "S4"
PAC <- as(PAC, "list")
}else{
tp <- "S3"
}
logi_no_hits <- unlist(lapply(map, function(x){x[[2]][1,1] == "no_hits"}))
if(any(logi_no_hits)==TRUE){
cat("The map object contains references without hits.",
"\nThese will be removed from output.")
map <- map[!unlist(lapply(map, function(x){
x[[2]][1,1] == "no_hits"
}))]
stopifnot(any(
unlist(lapply(map, function(x){
x[[2]][1,1] == "no_hits"})))==FALSE)
}
type_vector <- lapply(map, function(x){
# Setup
align <- x$Alignments
reference_length <- IRanges::width(x$Ref_seq)
ref_name <- names(x$Ref_seq)
# Classify according to terminal tolerance
terminal_type <- ifelse(align$Align_start <= terminal, "5'",
ifelse(reference_length - align$Align_end <= terminal,
"3'", "i'"))
# Classify according to terminal tolerance
half_type <- ifelse(
align$type_start_loop2 == TRUE | align$type_end_loop2 == TRUE,
"half", "tRF")
if(any(colnames(align) %in% "decoder")){
return(data.frame(tRNA_ref = ref_name,
seq = rownames(align),
class = paste(terminal_type, half_type, sep = "-"),
decoder = align$decoder,
acceptor = align$acceptor))}
else{
return(data.frame(tRNA_ref = ref_name,
seq = rownames(align),
class = paste(terminal_type, half_type, sep = "-"),
decoder = sub(".*?-(.*?)-.*", "\\1", ref_name),
acceptor = sub(".*?-.*?-(.*?)-.*", "\\1", ref_name)))
}
})
# Merge all references
type_vector <- do.call("rbind", type_vector)
rownames(type_vector) <- NULL
# Split by sequence instead of references
type_vector <- split(type_vector, type_vector$seq)
# Collapse multimapping seqences into one column
finished <- lapply(type_vector, function(x){
df <- data.frame(seq=paste0(sort(unique(x$seq)), collapse=";"),
class=paste0(sort(unique(x$class)), collapse=";"),
decoder=paste0(sort(unique(x$decoder)), collapse=";"),
acceptor=paste0(sort(unique(x$acceptor)), collapse=";"),
tRNA_ref=paste0(sort(unique(x$tRNA_ref)), collapse=";"))
type <- paste(sort(unique(x$decoder)), sort(unique(x$acceptor)), sep="-")
df$type <- paste0(type, collapse=";")
return(df)
})
finished <- do.call("rbind", finished)
# Extract tRNAs from PAC and merge results
PAC$Anno$seq <- rownames(PAC$Anno)
pac_trna <- PAC_filter(PAC, anno_target=list("seq", finished$seq),
subset_only=TRUE)
# Before you merge make sure both dataframes are matching
stopifnot(identical(rownames(pac_trna$Anno), as.character(finished$seq)))
pac_trna$Anno <- cbind(pac_trna$Anno[,1, drop=FALSE], finished[,-1])
## Double check and return
if(PAC_check(pac_trna)==TRUE){
if(tp=="S4"){
return(as.PAC(pac_trna))
}else{
return(pac_trna)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.