#' A function to convert Antigenic EMBOSS output into a nice & tidy DataFrame
#'
#' @param table_raw Antigenic EMBOSS output tsv
#' @param seqs A character vector containing fasta sequences
#' @param headers A character vector containing fasta headers
#' @param positions A Boolean, if \code{positions} is TRUE, columns with positions in the sequences are created.
#'
#' @return It returns a DataFrame containing the results in a 'tidy' format.
#' @export
#'
#' @examples
extractEMBOSS_antigenic <- function(table_raw, seqs, headers, positions = FALSE){
## Check arguments
if(!is.data.frame(table_raw))
stop("'table_raw' should be a data.frame")
if(!is.character(seqs))
stop("'seqs' should be character")
if(!is.character(headers))
stop("'headers' should be character")
if(!is.logical(positions))
stop("'positions' should be logical")
if(list(unique(table_raw$SeqName), seqs, headers) %>%
lengths() %>% unique() %>% length()!= 1)
stop("table_raw, seqs and headers should have the same length")
if(!all(c("SeqName", "Start", "End", "Score", "Max_score_pos") %in% colnames(table_raw)))
stop("Some columns are not contained in table_raw")
tbl <- table_raw %>%
tidyr::nest(data = c(!SeqName)) %>%
dplyr::mutate(SeqName = headers,
seq = seqs) %>%
tidyr::unnest(-c("SeqName", "seq"))
tbl$seq <-tbl %>% dplyr::select("seq", "Start", "End") %>%apply(MARGIN = c(1), FUN =function(x){
x[1] %>%
seqinr::s2c() %>%
magrittr::extract(x[2]:x[3]) %>%
seqinr::c2s()})
tbl <- tbl %>%
dplyr::group_by(SeqName)%>%
dplyr::mutate(row_number = dplyr::row_number()) %>%
dplyr::ungroup()%>%
dplyr::mutate(epitope = glue::glue("Epitope.{row_number}")) %>%
dplyr::select(-row_number) %>%
tidyr::pivot_wider(names_from = "epitope",
values_from = -c("epitope", "SeqName"))
if(!positions) {
tbl %>%
dplyr::select(-dplyr::starts_with(c("Start", "End"))) %>%
return()
}
else{
return(tbl)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.