R/extractEMBOSS_antigenic.R

Defines functions extractEMBOSS_antigenic

Documented in extractEMBOSS_antigenic

#' A function to convert Antigenic EMBOSS output into a nice & tidy DataFrame
#'
#' @param table_raw Antigenic EMBOSS output tsv
#' @param seqs  A character vector containing fasta sequences
#' @param headers A character vector containing fasta headers
#' @param positions A Boolean, if \code{positions} is TRUE, columns with positions in the sequences are created.
#'
#' @return It returns a DataFrame containing the results in a 'tidy' format.
#' @export
#'
#' @examples
extractEMBOSS_antigenic <- function(table_raw, seqs, headers, positions = FALSE){
  ## Check arguments
  if(!is.data.frame(table_raw))
    stop("'table_raw' should be a data.frame")
  if(!is.character(seqs))
    stop("'seqs' should be character")
  if(!is.character(headers))
    stop("'headers' should be character")
  if(!is.logical(positions))
    stop("'positions' should be logical")
  if(list(unique(table_raw$SeqName), seqs, headers) %>%
     lengths() %>% unique() %>% length()!= 1)
    stop("table_raw, seqs and headers should have the same length")
  if(!all(c("SeqName", "Start", "End", "Score", "Max_score_pos") %in% colnames(table_raw)))
    stop("Some columns are not contained in table_raw")

   tbl <- table_raw %>%
    tidyr::nest(data = c(!SeqName)) %>%
    dplyr::mutate(SeqName = headers,
                  seq = seqs) %>%
    tidyr::unnest(-c("SeqName", "seq"))

   tbl$seq <-tbl %>% dplyr::select("seq", "Start", "End") %>%apply(MARGIN = c(1), FUN =function(x){
    x[1] %>%
      seqinr::s2c() %>%
      magrittr::extract(x[2]:x[3]) %>%
      seqinr::c2s()})
   tbl <- tbl %>%
     dplyr::group_by(SeqName)%>%
     dplyr::mutate(row_number = dplyr::row_number()) %>%
     dplyr::ungroup()%>%
     dplyr::mutate(epitope = glue::glue("Epitope.{row_number}")) %>%
     dplyr::select(-row_number) %>%
     tidyr::pivot_wider(names_from = "epitope",
        values_from = -c("epitope", "SeqName"))
   if(!positions) {
     tbl %>%
       dplyr::select(-dplyr::starts_with(c("Start", "End"))) %>%
       return()
   }
   else{
     return(tbl)
   }
}
currocam/FascinRSCA documentation built on March 21, 2022, 6:29 a.m.