#' @title Read file in blast6out format generated by USEARCH or VSEARCH
#' @description Read a file in blast6out format generated by either USEARCH or VSEARCH.
#' @param blast6out.file to blast6out file (\code{*.blast6out} extension).
#' @author Hajk-Georg Drost
#' @examples
#' # read example *.blast6out file
#' test.blast6out <- read.blast6out(system.file("test.blast6out", package = "LTRpred"))
#'
#' # look at the format in R
#' head(test.blast6out)
#' @return
#' A dataframe storing the following columns:
#'
#' \itemize{
#' \item \code{query:} query id.
#' \item \code{subject:} subject id.
#' \item \code{perc_ident:} pecent identity between query and subject.
#' \item \code{align_len:} alignment length between query and subject.
#' \item \code{n_mismatch:} number of mismathces between query and subject.
#' \item \code{n_gap_open:} number of gap openings between query and subject.
#' \item \code{start_q:} start position in query. Query coordinates start with 1 at the
#' first base in the sequence as it appears in the input file. For translated searches (nucleotide queries, protein targets), query start < end for +ve frame and start > end for -ve frame.
#' \item \code{end_q:} end position in query.
#' \item \code{start_s:} start position in subject. Subject coordinates start with 1 at
#' the first base in the sequence as it appears in the database. For untranslated
#' nucleotide searches, subject start < end for plus strand, start > end for a reverse-complement alignment.
#' \item \code{end_s:} end position in subject.
#' \item \code{evalue:} evalue calculated using Karlin-Altschul statistics.
#' \item \code{bit_score:} bit score calculated using Karlin-Altschul statistics.
#' }
#' @export
read.blast6out <- function(blast6out.file){
file.check(blast6out.file)
blast6out.df <- readr::read_tsv(blast6out.file, col_names = FALSE,
col_types = readr::cols(
"X1" = readr::col_character(),
"X2" = readr::col_character(),
"X3" = readr::col_double(),
"X4" = readr::col_integer(),
"X5" = readr::col_integer(),
"X6" = readr::col_integer(),
"X7" = readr::col_integer(),
"X8" = readr::col_integer(),
"X9" = readr::col_integer(),
"X10" = readr::col_integer(),
"X11" = readr::col_double(),
"X12" = readr::col_double()
))
colnames(blast6out.df) <-
c(
"query",
"subject",
"perc_ident",
"align_len",
"n_mismatch",
"n_gap_open",
"start_q",
"end_q",
"start_s",
"end_s",
"evalue",
"bit_score"
)
return(blast6out.df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.