#`
#' Load Blast database output
#'
#' Blast outputs a 12 columns file. This convenicen function will
#' laod this tabular data and give it the correect names. The default outputs
#' are:
#' `qseqid sseqid pident length mismatch gapopen
#' qstart qend sstart send evalue bitscore'
#' and are converted to:
#' `query target percent_ident length mismatch gapopen
#' qstart qend sstart send evalue bitscore'
#'
#' @importFrom readr read_tsv
#' @export
#' @seealso
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK279675/}{The Blast Book}
#'
load_blast <- function(filename){
blasttable <- read_tsv(file = filename,
col_names = c('query', 'target', 'percent_ident',
'length', 'mismatch', 'gapopen',
'qstart', 'qend', 'sstart', 'send',
'evalue', 'bitscore'))
}
#'
#' Load Usearch UC Files
#'
#' Usearch is a program that performs clustering and outputs it's cluster information in a tabular format
#'
#' @details
#' Field Description
#' 1 Record type S, H, C or N (see table below).
#' 2 Cluster number (0-based).
#' 3 Sequence length (S, N and H) or cluster size (C).
#' 4 For H records, percent identity with target.
#' 5 For H records, the strand: + or - for nucleotides, . for proteins.
#' 6 Not used, parsers should ignore this field. Included for backwards compatibility.
#' 7 Not used, parsers should ignore this field. Included for backwards compatibility.
#' 8 Compressed alignment or the symbol '=' (equals sign). The = indicates that the
#' query is 100% identical to the target sequence (field 10).
#' 9 Label of query sequence (always present).
#' 10 Label of target sequence (H records only).
#'
#' Record Description
#' H Hit. Represents a query-target alignment. For clustering, indicates the
#' cluster assignment for the query. If ‑maxaccepts > 1, only there is
#' only one H record giving the best hit. To get the other accepts, use
#' another type of output file, or use the ‑uc_allhits option
#' (requires version 6.0.217 or later).
#' S Centroid (clustering only). There is one S record for each cluster,
#' this gives the centroid (representative) sequence label in the 9th
#' field. Redundant with the C record; provided for backwards
#' compatibility.
#' C Cluster record (clustering only). The 3rd field is set to the cluster
#' size (number of sequences in the cluster) and the 9th field is set to
#' the label of the centroid sequence.
#' N No hit (for database search without clustering only). Indicates that no
#' accepts were found. In the case of clustering, a query with no hits
#' becomes the centroid of a new cluster and generates an S record instead
#' of an N record.
#'
#' @importFrom readr read_delim
#' @importFrom dplyr %>%
#' @export
#' @seealso
#' \href{http://www.drive5.com/usearch/manual/opt_uc.html}{Usearch UC Documentation}
load_uc <- function(filename){
uctable <- read_tsv(file = filename,
col_names = c('rectype', 'clusternum', 'seqlength_clustsize', 'percent_ident',
'strand', 'nothing1','nothing2', 'compressed_algn', 'query', 'target'))
return(uctable %>% select(-nothing1, -nothing2))
}
#'
#' Read HMMMer DomTbl File
#'
#' HMMER outputs several filetypes this one will read tits Domain scanning program
#'
load_hmmmerdomtbl <- function(filename){
hmmtbl <- read_tsv(file = filename,
skip=3,
col_names = c('target', 't_accession','tlen', 'queryname','q_accession',
'qlen', 'full_e-val','full_score','full_bias','dom_number',
'dom_of','dom_c-Evalue','dom_i-Evalue','dom_score','dom_bias',
'hmmcoord_from','hmmcoord_to','alncoord_from','alncoord_to',
'envcoord_from', 'envcoord_to','acc','target_decsription'))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.