R/sequence.R
In primerTree: Visually Assessing the Specificity and Informativeness of Primer Pairs

Documented in get_sequence get_sequences tree_from_alignment

#PrimerTree
#Copyright (C) 2013 Jim Hester

#' Retrieves a fasta sequence from NCBI nucleotide database.
#'
#' @param accession nucleotide accession to retrieve.
#' @param start start base to retrieve, numbered beginning at 1.  If NULL the
#'        beginning of the sequence.

#' @param stop last base to retrieve, numbered beginning at 1. if NULL the end of
#'        the sequence.
#' @param api_key NCBI api-key to allow faster sequence retrieval.
#' @return an DNAbin object.
#' @seealso \code{\link{DNAbin}}
#' @export

get_sequence = function(accession, start=NULL, stop=NULL, api_key=Sys.getenv("NCBI_API_KEY")){

  fetch_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'

  query=list(db='nuccore', rettype='fasta', retmode='text', id=accession)

  if(!is.null(start))
    query$seq_start = start

  if(!is.null(stop))
    query$seq_stop = stop

  if(nzchar(api_key))
    query$api_key = api_key

  response = POST_retry(fetch_url, body=query)

  #stop if response failed
  stop_for_status(response)

  content = content(response, as='raw')

  #from ape package read.FASTA
  res <- .Call("rawStreamToDNAbin", content)
  names(res) <- sub("^ +", "", names(res))
  class(res) <- "DNAbin"
  res
}

#' Retrieves fasta sequences from NCBI nucleotide database.
#'
#' @param accession the accession number of the sequence to retrieve
#' @param start start bases to retrieve, numbered beginning at 1.  If NULL the
#'        beginning of the sequence.

#' @param stop stop bases to retrieve, numbered beginning at 1. if NULL the stop of
#'        the sequence.
#' @param api_key NCBI api-key to allow faster sequence retrieval.
#' @param simplify simplify the FASTA headers to include only the genbank
#'        accession.
#' @param .parallel if 'TRUE', perform in parallel, using parallel backend
#'        provided by foreach
#' @param .progress name of the progress bar to use, see 'create_progress_bar'
#' @return an DNAbin object.
#' @seealso \code{\link{DNAbin}}
#' @export

get_sequences = function(accession, start=NULL, stop=NULL, api_key=Sys.getenv("NCBI_API_KEY"), simplify=TRUE, .parallel=FALSE, .progress='none'){
  #expand arguments by recycling
  args = expand_arguments(accession=accession, start=start, stop=stop)
  #assign expanded arguments to actual arguments
  lapply(seq_along(args), function(i) names(args)[i] <<- args[i])

  #define rate to query NCBI servers with get_sequences
  query_rate <- 3; #queries per second

  if(nzchar(api_key)) {
    query_rate <- 10
  } else {
    warning("Sequence retrieval limited to 3 per second. Provide an api_key to increase this to 10. See:
  https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/", immediate. = TRUE)
  }

  size = length(accession)
  get_sequence_itr = function(i){
    start_time <- Sys.time()
    sequence = get_sequence(accession[i], start[i], stop[i], api_key)
    stop_time <- Sys.time()
    if((stop_time - start_time) < (1 / query_rate)) {
      #sleep to limit query rate :-(
      Sys.sleep((1/query_rate) - (stop_time - start_time))
    }
    sequence
  }
  sequences = alply(seq_along(accession), .margins=1, .parallel=.parallel, .progress=.progress, failwith(NA, f=get_sequence_itr))
  names = if(simplify) accession else laply(sequences, names)
  sequences = llply(sequences, `[[`, 1)
  names(sequences) = names
  class(sequences) = 'DNAbin'
  sequences
}
# from http://stackoverflow.com/questions/9335099/implementation-of-standard-recycling-rules
expand_arguments <- function(...){
  dotList <- list(...)
  max.length <- max(sapply(dotList, length))
  suppressWarnings(lapply(dotList, rep, length=max.length))
}
#' Construct a neighbor joining tree from a dna alignment
#'
#' @param dna fasta dna object the tree is to be constructed from
#' @param pairwise.deletion a logical indicating if the distance matrix should 
#' be constructed using pairwise deletion
#' @param ... furthur arguments to dist.dna
#' @seealso \code{\link{dist.dna}}, \code{\link{nj}}
#' @export
tree_from_alignment = function(dna, pairwise.deletion=TRUE, ...){
  nj(dist.dna(dna, model="N", pairwise.deletion=pairwise.deletion, ...))
}
#' Multiple sequence alignment with clustal omega
#'
#' Calls clustal omega to align a set of sequences of class DNAbin.  Run
#' without any arguments to see all the options you can pass to the command
#' line clustal omega.
#' @param x an object of class 'DNAbin'
#' @param exec a character string with the name or path to the program
#' @param quiet whether to supress output to stderr or stdout
#' @param original.ordering use the original ordering of the sequences
#' @param ... additional arguments passed to the command line clustalo
#' @export
clustalo = function (x, exec = 'clustalo', quiet = TRUE, original.ordering = TRUE, ...)
{
    help_text = system(paste(exec, '--help'), intern=TRUE)
    all_options = get_command_options(help_text)

    inf <- tempfile(fileext='.fas')
    outf <- tempfile(fileext='.aln')

    options = c(infile=inf, outfile=outf, list(...))
    match_args = pmatch(names(options), names(all_options), duplicates.ok=TRUE)
    bad_args = is.na(match_args)

    if (missing(x)){
        message(paste(help_text, collapse="\n"))
        stop('No input')
    }
    if(any(bad_args)){
      stop(paste(names(options)[bad_args], collapse=','), ' not valid option\n')
    }

    write.dna(x, inf, "fasta")
    args = paste(paste(all_options[match_args], options, collapse=' '))
    system2(exec, args=args, stdout = ifelse(quiet, FALSE, ''), stderr = ifelse(quiet, FALSE, ''))
    res <- read.dna(outf, "fasta")
    if (original.ordering)
        res <- res[labels(x), ]
    res
}

#parses the usage and enumerates the commands
get_command_options = function(usage){
  m = gregexpr('-+\\w+', usage)
  arguments = unlist(regmatches(usage, m))
  names(arguments) = gsub('-+', '', arguments)
  arguments
}

Any scripts or data that you put into this service are public.

primerTree documentation built on April 5, 2022, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

primerTree
Visually Assessing the Specificity and Informativeness of Primer Pairs

R/sequence.R
In primerTree: Visually Assessing the Specificity and Informativeness of Primer Pairs

Defines functions get_command_options tree_from_alignment expand_arguments get_sequences get_sequence

Documented in get_sequence get_sequences tree_from_alignment

Try the primerTree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

primerTree Visually Assessing the Specificity and Informativeness of Primer Pairs

R/sequence.R In primerTree: Visually Assessing the Specificity and Informativeness of Primer Pairs

Defines functions get_command_options tree_from_alignment expand_arguments get_sequences get_sequence

Documented in get_sequence get_sequences tree_from_alignment

Try the primerTree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

primerTree
Visually Assessing the Specificity and Informativeness of Primer Pairs

R/sequence.R
In primerTree: Visually Assessing the Specificity and Informativeness of Primer Pairs