#' A function to convert Pepdigest EMBOSS output into a nice & tidy DataFrame
#'
#' @param table_raw Pepdigest EMBOSS output tsv
#' @param seqs A character vector containing fasta sequences
#' @param headers A characyer vector containing fasta headers
#' @param extra A boolean, if \code{extra} is TRUE, columns with Score, Mol_weigth, Cterm and Nterm are created.
#' @param col_mode A character with the name of proteolytic enzyme or reagent.
#'
#' @return It returns a DataFrame containing the results in a 'tidy' format.
#' @export
#'
#' @examples
extractEMBOSS_pepdigest <- function(table_raw, seqs, headers,
col_mode = "", extra = FALSE){
## Check arguments
if(!is.data.frame(table_raw))
stop("'table_raw' should be a data.frame")
if(!is.character(seqs))
stop("'seqs' should be character")
if(!is.character(col_mode))
stop("'col_mode' should be character")
if(length(col_mode) != 1)
stop("'col_mode' length should be 1")
if(!is.character(headers))
stop("'headers' should be character")
if(!is.logical(extra))
stop("'extra' should be logical")
if(list(unique(table_raw$SeqName), seqs, headers) %>%
lengths() %>% unique() %>% length()!= 1)
stop("table_raw, seqs and headers should have the same length")
if(!all(c("SeqName", "Start", "End", "Score", "Mol_Weight", "Cterm", "Nterm") %in% colnames(table_raw)))
stop("Some columns are not contained in table_raw")
tbl <- table_raw %>%
tidyr::nest(data = c(!SeqName)) %>%
dplyr::mutate(SeqName = headers,
seq = seqs) %>%
tidyr::unnest(-c("SeqName", "seq"))
tbl$seq <-tbl %>% dplyr::select("seq", "Start", "End") %>%apply(MARGIN = c(1), FUN =function(x){
x[1] %>%
seqinr::s2c() %>%
magrittr::extract(x[2]:x[3]) %>%
seqinr::c2s()})
tbl <- tbl %>%
dplyr::group_by(SeqName) %>%
dplyr::mutate(row_number = dplyr::row_number()) %>%
dplyr::ungroup()%>%
dplyr::mutate(CleavageSites = glue::glue("CleavageSites.{col_mode}{row_number}"))%>%
dplyr::select(-row_number) %>%
tidyr::pivot_wider(names_from = "CleavageSites",
values_from = -c("CleavageSites", "SeqName"))
if(!extra) {
tbl %>%
dplyr::select(-dplyr::starts_with(c("Start", "End", "Score", "Mol_Weight", "Cterm","Nterm"))) %>%
return()
}
else{
return(tbl)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.