R/FASTA_Annotations.R

Defines functions get_anno

Documented in get_anno

#' Prepares gets annotation from FASTA file
#' @param trans_fasta object generated by readDNAStringSet
#' from Biostrings package
#' @export

get_anno <- function(trans_fasta){
    
    annot_list <- stringr::str_split(names(trans_fasta), pattern = "[ :]", n = 18)
    
    data_idx <- c(1, 5, 6, 7, 8, 10, 12, 14, 16, 18)
    
    anno_names <- c("transcript_id", "chromosome", "start", "end", "strand", "gene",
                    "gene_biotype", "transcript_biotype", "gene_symbol",
                    "description")
    
    anno  <- purrr::map(annot_list, ~ .x[data_idx] )
    names(anno)  <- purrr::map(anno, 1)
    
    
    anno_df <- matrix(NA, nrow = length(anno), ncol = length(anno_names))
    
    for(i in seq_len(length(anno))){
        
        anno_df[i, ] <- anno[[i]]
        
    }
    colnames(anno_df) <- anno_names
    anno_df <-  as_tibble(anno_df)
    anno_df
    
}
Rvirgenslane/Hotgenes documentation built on Aug. 22, 2020, 2:11 a.m.