R/extractEMBOSS_pepdigest.R

Defines functions extractEMBOSS_pepdigest

Documented in extractEMBOSS_pepdigest

#' A function to convert Pepdigest EMBOSS output into a nice & tidy DataFrame
#'
#' @param table_raw Pepdigest EMBOSS output tsv
#' @param seqs  A character vector containing fasta sequences
#' @param headers A characyer vector containing fasta headers
#' @param extra A boolean, if \code{extra} is TRUE, columns with Score, Mol_weigth, Cterm and Nterm are created.
#' @param col_mode A character with the name of proteolytic enzyme or reagent.
#'
#' @return It returns a DataFrame containing the results in a 'tidy' format.
#' @export
#'
#' @examples
extractEMBOSS_pepdigest <- function(table_raw, seqs, headers,
  col_mode = "", extra = FALSE){
  ## Check arguments
  if(!is.data.frame(table_raw))
    stop("'table_raw' should be a data.frame")
  if(!is.character(seqs))
    stop("'seqs' should be character")
  if(!is.character(col_mode))
    stop("'col_mode' should be character")
  if(length(col_mode) != 1)
    stop("'col_mode' length should be 1")
  if(!is.character(headers))
    stop("'headers' should be character")
  if(!is.logical(extra))
    stop("'extra' should be logical")
  if(list(unique(table_raw$SeqName), seqs, headers) %>%
     lengths() %>% unique() %>% length()!= 1)
    stop("table_raw, seqs and headers should have the same length")
  if(!all(c("SeqName", "Start", "End", "Score", "Mol_Weight", "Cterm", "Nterm") %in% colnames(table_raw)))
    stop("Some columns are not contained in table_raw")

  tbl <- table_raw %>%
    tidyr::nest(data = c(!SeqName)) %>%
    dplyr::mutate(SeqName = headers,
                  seq = seqs) %>%
    tidyr::unnest(-c("SeqName", "seq"))

  tbl$seq <-tbl %>% dplyr::select("seq", "Start", "End") %>%apply(MARGIN = c(1), FUN =function(x){
    x[1] %>%
      seqinr::s2c() %>%
      magrittr::extract(x[2]:x[3]) %>%
      seqinr::c2s()})
  tbl <- tbl %>%
    dplyr::group_by(SeqName) %>%
    dplyr::mutate(row_number = dplyr::row_number()) %>%
    dplyr::ungroup()%>%
    dplyr::mutate(CleavageSites = glue::glue("CleavageSites.{col_mode}{row_number}"))%>%
    dplyr::select(-row_number) %>%
    tidyr::pivot_wider(names_from = "CleavageSites",
                       values_from = -c("CleavageSites", "SeqName"))
  if(!extra) {
    tbl %>%
      dplyr::select(-dplyr::starts_with(c("Start", "End", "Score", "Mol_Weight", "Cterm","Nterm"))) %>%
      return()
  }
  else{
    return(tbl)
  }
}
currocam/FascinRSCA documentation built on March 21, 2022, 6:29 a.m.