R/makeEpiTxDbFromtRNAdb.R
In EpiTxDb: Storing and accessing epitranscriptomic information using the AnnotationDbi interface

Documented in gettRNAdbDataAsGRanges listAvailableOrganismsFromtRNAdb makeEpiTxDbFromtRNAdb

#' @include EpiTxDb-class.R
#' @include makeEpiTxDb.R
NULL

#' @name makeEpiTxDbFromtRNAdb
#'
#' @title Create a \code{EpiTxDb} object from tRNAdb resources
#'
#' @description
#' \code{makeEpiTxDbFromtRNAdb} will make use of the tRNAdb online
#' resources and extract the modification information from the RNA database.
#'
#' If a named \code{\link[Biostrings:XStringSet-class]{DNAStringSet}} is
#' provided as \code{sequences}, the result from the tRNAdb will be matched
#' against the sequences. Valid matches will be used as transcript identifiers
#' and returned after a check of modification compatibility with the provided
#' sequence. By this process multiple copies of transcripts can be associated
#' with a single modification.
#'
#' \code{makeEpiTxDbFromtRNAdb} uses the functions provided by the
#' \code{\link[tRNAdbImport:tRNAdbImport]{tRNAdbImport}} package.
#' \code{\link[tRNAdbImport:import.tRNAdb]{import.tRNAdb}} will be used with
#' \code{database = "RNA"} and the three different values for \code{origin}.
#'
#' @param organism A \code{character} value for an organism available on the
#'   tRNAdb website.
#' @param sequences A named \code{DNAStringSet} or \code{RNAStringSet}, which
#'   will be used to associate a tRNAdb result with a specific transcript.
#' @param dbURL The URL to the tRNA db website.
#' @param metadata See \code{\link[=makeEpiTxDb]{makeEpiTxDb}}
#'
#' @references
#' Juehling F, Moerl M, Hartmann RK, Sprinzl M, Stadler PF, Puetz J. 2009.
#' "tRNAdb 2009: compilation of tRNA sequences and tRNA genes." Nucleic Acids
#' Research, Volume 37 (suppl_1): D159–162. doi:10.1093/nar/gkn772.
#'
#' @return a \code{EpiTxDb} object.
#'
#' @export
#'
#' @examples
#' # getting just the annotation data
#' etdb <- makeEpiTxDbFromtRNAdb("Saccharomyces cerevisiae")
#'
#' # For associating the result with transcripts, provide and additional
#' # named DNAStringSet object. Matching will be done against each sequence
#' # allowing 5 mismatches and indels. The final result will be checked for
#' # validity regarding the identity of the modifications
#' \dontrun{
#' etdb <- makeEpiTxDbFromtRNAdb("Saccharomyces cerevisiae",
#'                               some_transcript_sequences)
#' }
NULL

# makeEpiTxDbFromtRNAdb --------------------------------------------------------

.add_reference_information_tRNAdb <- function(mod, gr){
    # collect references
    f_ref <- mcols(gr)$tRNAdb_reference != ""
    f_pmid <- mcols(gr)$tRNAdb_pmid != ""

    ref_type <- list(IRanges::CharacterList(as.list(rep("tRNAdb_ID",
                                                        length(gr)))),
                     IRanges::CharacterList(as.list(rep("tRNAdb_REF",
                                                        length(gr)))),
                     IRanges::CharacterList(as.list(rep("PMID",
                                                        length(gr)))))
    ref_type[[2]][!f_ref] <- ""
    ref_type[[3]][!f_pmid] <- ""
    ref_type <- do.call(pc,ref_type)
    ref_type <- ref_type[ref_type != ""]

    references <- list(mcols(gr)$tRNAdb_ID,
                       mcols(gr)$tRNAdb_reference,
                       mcols(gr)$tRNAdb_pmid)
    references[[2]][!f_ref] <- ""
    references[[3]][!f_pmid] <- ""
    references <- do.call(pc,references)
    references <- references[references != ""]

    m <- as.integer(S4Vectors::match(seqnames(mod), seqnames(gr)))
    mcols(mod)$ref_type <- ref_type[m]
    mcols(mod)$ref <- references[m]
    mod
}

#' @importFrom Biostrings subseq vwhichPDict
.map_modifications_to_sequences <- function(mod, gr, sequences){
    seq <- as(gr$tRNA_seq,"RNAStringSet")
    width <- nchar(seq)
    width[gr$tRNA_CCA.end] <- width[gr$tRNA_CCA.end] - 3L
    seq <- Biostrings::subseq(seq, 1L, width)
    # remove sequences which are definitly to long
    max_length <- max(lengths(seq))
    min_length <- min(lengths(seq))
    sequences[lengths(sequences) > (max_length + 50)] <-
        do.call(class(sequences),
                list(paste0(rep("A",(max_length + 50)),collapse = "")))
    sequences[lengths(sequences) < min_length] <-
        do.call(class(sequences),
                list(paste0(rep("A",(max_length + 50)),collapse = "")))
    hits <- Biostrings::vwhichPDict(sequences, seq,
                                    with.indels = TRUE, max.mismatch = 5L)
    hits <- Hits(unlist(hits),as.integer(unlist(Map(rep.int,seq_along(hits),lengths(hits)))),
                 length(sequences), length(seq))
    hits_mod <- findMatches(seqnames(mod),names(seq))
    # transfer names of sequences to seqnames for modifications and expand the
    # result with multiple matches
    sn_name <- names(sequences)[queryHits(hits)]
    sn_name <- IRanges::CharacterList(split(sn_name,subjectHits(hits)))
    sn_name <- vapply(sn_name,paste,character(1),collapse=",")
    hits_mod <- hits_mod[subjectHits(hits_mod) %in% unique(subjectHits(hits))]
    mcols(mod)[queryHits(hits_mod),"sn_name"] <- sn_name[as.character(subjectHits(hits_mod))]
    sn_name <- IRanges::CharacterList(strsplit(as.character(mcols(mod)$sn_name),","))
    # expand results
    mod <- mod[unlist(Map(rep,seq_along(sn_name),lengths(sn_name)))]
    # assemble new result
    mcols <- mcols(mod)[,colnames(mcols(mod)) != "sn_name"]
    mod <- GenomicRanges::GRanges(seqnames = unlist(sn_name),
                                  ranges = IRanges::ranges(mod),
                                  strand = strand(mod),
                                  mcols)
    mcols(mod)$mod_id <- seq_along(mod)
    mod
}

.add_tRNAdb_metadata_columns <- function(gr){
    # assemble metadata columns
    mcols <- mcols(gr)
    colnames(mcols) <- gsub("^mod$","mod_type",colnames(mcols))
    mcols$mod_name <- paste0(mcols$mod_type,"_",start(gr),"_",seqnames(gr))
    mcols$mod_id <- seq_len(nrow(mcols))
    mcols(gr) <- mcols
    gr
}

#' @rdname makeEpiTxDbFromtRNAdb
#' @importFrom tRNAdbImport import.tRNAdb
#' @export
gettRNAdbDataAsGRanges <- function(organism, sequences = NULL,
                                   dbURL = tRNAdbImport::TRNA_DB_URL){

    if(!is.character(organism) || length(organism) > 1L ||
       !(organism %in% listAvailableOrganismsFromtRNAdb())){
        stop("'organism' must be a single character value and match an entry ",
             "from listAvailableOrganismsFromtRNAdb()")
    }

    # get tRNAdb information
    message("Loading data from tRNAdb ...")
    gr <- lapply(c("allothers","plastid","mitochondrial"),
                 function(origin){
                     try(tRNAdbImport::import.tRNAdb(organism = organism,
                                                     database = "RNA",
                                                     origin = origin,
                                                     dbURL = dbURL),
                         silent = TRUE)
                 })
    gr <- gr[!vapply(gr,is,logical(1),"try-error")]
    if(length(gr) == 0L){
        stop(".")
    }
    gr <- suppressWarnings(do.call(c,gr))
    message("Assembling data ...")
    mod <- separate(gr$tRNA_seq)
    mod <- .add_reference_information_tRNAdb(mod, gr)
    #
    if(!is.null(sequences)){
        message("Trying to associate tRNAdb entries with sequences ...")
        if(is.null(names(sequences)) ||
           anyDuplicated(names(sequences)) ||
           any(names(sequences) == "")){
            stop("names() of 'sequences' must be set, unique and not empty.",
                 call. = FALSE)
        }
        mod <- .map_modifications_to_sequences(mod, gr, sequences)
    }
    # assemble metadata columns
    mod <- .add_tRNAdb_metadata_columns(mod)
    mod
}

#' @rdname makeEpiTxDbFromtRNAdb
#' @export
makeEpiTxDbFromtRNAdb <- function(organism, sequences = NULL, metadata = NULL,
                                  dbURL = tRNAdbImport::TRNA_DB_URL){
    gr <- gettRNAdbDataAsGRanges(organism, sequences = sequences, dbURL = dbURL)
    if(!is.null(sequences)){
        gr <- gr[!duplicated(paste0(as.character(gr),"-",gr$mod_type))]
        colnames(mcols(gr)) <- gsub("mod_type","mod",colnames(mcols(gr)))
        gr <- Modstrings::removeIncompatibleModifications(gr, sequences)
        colnames(mcols(gr)) <- gsub("^mod$","mod_type",colnames(mcols(gr)))
    }
    makeEpiTxDbFromGRanges(gr, metadata = metadata, reassign.ids = FALSE)
}


#' @rdname makeEpiTxDbFromtRNAdb
#' @importFrom httr modify_url POST content
#' @importFrom xml2 xml_find_all
#' @export
listAvailableOrganismsFromtRNAdb <- function(){
    res <- httr::POST(paste0(httr::modify_url(tRNAdbImport::TRNA_DB_URL),
                             "DataOutput/Organisms"))
    html <- httr::content(res)
    organsisms <- as.character(xml2::xml_find_all(html,'.//div[@id="inhalt"]//span[count(.//span)=0]//a//b/text()'))
    rna_seq <- as.character(xml2::xml_find_all(html,'.//div[@id="inhalt"]//span[count(.//span)=0]//a//small/text()'))
    rna_n <- vapply(regmatches(rna_seq,regexec("genes,[ ]{1}([0-9]+)[ ]{1}RNA",rna_seq)),"[",character(1),2)
    organsisms[rna_n != 0]
}

Any scripts or data that you put into this service are public.

EpiTxDb documentation built on March 26, 2021, 6 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

EpiTxDb
Storing and accessing epitranscriptomic information using the AnnotationDbi interface

R/makeEpiTxDbFromtRNAdb.R
In EpiTxDb: Storing and accessing epitranscriptomic information using the AnnotationDbi interface

Defines functions listAvailableOrganismsFromtRNAdb makeEpiTxDbFromtRNAdb gettRNAdbDataAsGRanges .add_tRNAdb_metadata_columns .map_modifications_to_sequences .add_reference_information_tRNAdb

Documented in gettRNAdbDataAsGRanges listAvailableOrganismsFromtRNAdb makeEpiTxDbFromtRNAdb

Try the EpiTxDb package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EpiTxDb Storing and accessing epitranscriptomic information using the AnnotationDbi interface

R/makeEpiTxDbFromtRNAdb.R In EpiTxDb: Storing and accessing epitranscriptomic information using the AnnotationDbi interface

Defines functions listAvailableOrganismsFromtRNAdb makeEpiTxDbFromtRNAdb gettRNAdbDataAsGRanges .add_tRNAdb_metadata_columns .map_modifications_to_sequences .add_reference_information_tRNAdb

Documented in gettRNAdbDataAsGRanges listAvailableOrganismsFromtRNAdb makeEpiTxDbFromtRNAdb

Try the EpiTxDb package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EpiTxDb
Storing and accessing epitranscriptomic information using the AnnotationDbi interface

R/makeEpiTxDbFromtRNAdb.R
In EpiTxDb: Storing and accessing epitranscriptomic information using the AnnotationDbi interface