R/all-generics.R

#' @include utils.R
#' @useDynLib biofiles
NULL

# The biofiles API -------------------------------------------------------

##    Basic getters/setters in gbLocation-class, gbFeature-class, gbFeatureTable-class,
##    gbRecord-class:
##      start, end, span, strand, joint_range, start<-, end<-, strand<-, ranges
##
##    Getters/setters in gbLocation-class:
##      fuzzy, accession
##
##    Getters/setters in gbFeature-class, gbFeatureTable-class:
##      index, key, location, ranges, sequence, seqinfo
##      qualif, dbxref, locusTag, product, proteinID, note, translation
##      key<-, qualif<-
##
##    Getters/setters in gbRecord-class:
##      locus, reference accession, definition
##
##    Testing methods in gbFeature-class, gbFeatureTable-class:
##      hasKey, hasQualif
##
##    Show methods all classes:
##      show
##
##    Subsetting Methods:
##      [, [[, $
##
##    Genetic Write methods:
##      write.GenBank, write.FeatureTable
##    
##    The "shift" and "ranges" generics are defined in the IRanges package.

# getter/setter generics -------------------------------------------------

### The "start" and "start<- generics are defined in the BiocGenerics package.
#' Get or set the start position of genomic features
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param join Join compound genomic locations into a single range.
#' @param ... Further arguments passed to methods.
#' @return An integer vector or a list of integer vectors.
#' @seealso
#'   \code{\link{end}}, \code{\link{strand}}, \code{\link{span}}, \code{\link{ranges}}
#' @importFrom BiocGenerics start
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' 
#' ## start
#' start(x)
#' cds <- x["CDS"]
#' start(cds)
setGeneric("start")

#' @param value The start information to set on \code{x}.
#' @importFrom BiocGenerics "start<-"
#' @rdname start
#' @export
#' @examples
#' 
#' ## `start<-`
#' start(cds) <- 10
#' start(cds)
setGeneric("start<-")

### The "end" and "end<- generics are defined in the BiocGenerics package.
#' Get or set the end position of genomic features
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param join Join compound genomic locations into a single range.
#' @param ... Further arguments passed to methods.
#' @return An integer vector or a list of integer vectors.
#' @seealso
#'   \code{\link{start}}, \code{\link{strand}}, \code{\link{span}}, \code{\link{ranges}}
#' @importFrom BiocGenerics end
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' 
#' ## end
#' end(x)
#' cds <- x["CDS"]
#' end(cds)
setGeneric("end")

#' @param value The end information to set on \code{x}.
#' @importFrom BiocGenerics "end<-"
#' @rdname end
#' @export
#' @examples
#' 
#' ## `end<-`
#' end(cds) <- 1000
#' end(cds)
#' ranges(cds)
setGeneric("end<-")

### The "strand" and "strand<-" generics are defined in the BiocGenerics package.
#' Get or set the strand information of genomic features
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param join Join compound genomic locations into a single range.
#' @param ... Further arguments passed to methods.
#' @return An integer vector (or a list thereof) of 1 (plus strand), -1 (minus strand), or
#' \code{NA}
#' @seealso
#'   \code{\link{start}}, \code{\link{end}}, \code{\link{span}}, \code{\link{ranges}}
#' @importFrom BiocGenerics strand
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' 
#' ## strand
#' strand(x)
setGeneric("strand")

#' @param value The strand information to set on \code{x}.
#' @importFrom BiocGenerics "strand<-"
#' @export
#' @rdname strand
#' @examples
#' 
#' ## `strand<-`
#' showMethods("strand<-")
setGeneric("strand<-", signature = "x")

#' Get the span of genomic features.
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param join Join compound genomic locations into a single range.
#' @param ... Further arguments passed to methods.
#' @return An integer vector or a list of integer vectors.
#' @seealso
#'   \code{\link{start}}, \code{\link{end}}, \code{\link{strand}}, \code{\link{ranges}}
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' span(x)
setGeneric("span", signature = "x", function(x, ...) {
  standardGeneric("span")
})

#' @rdname span
#' @export
setGeneric("joint_range", signature = "x", function(x) standardGeneric("joint_range"))

### The "ranges" generic is defined in the IRanges package.
#' Extract features as \code{"\linkS4class{GRanges}"} objects.
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param join Join compound genomic locations into a single range.
#' @param key Include feature keys with ranges.
#' @param include Include qualifiers as metadata columns. Can be "none",
#' "all", or a character vector of qualifier tags.
#' @param exclude Exclude specific qualifiers.
#' @param ... Further arguments passed to methods.
#' @return A \code{\linkS4class{GRanges}} or \code{\linkS4class{GRangesList}} object.
#' @seealso
#'   \code{\link{start}}, \code{\link{end}}, \code{\link{span}}, \code{\link{strand}},
#'   \code{\link{location}}, \code{\link{key}}, \code{\link{qualif}}
#' @importFrom IRanges ranges
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' 
#' ## default "GRanges" object.
#' ranges(x)
#' 
#' ## subset CDSs and include "product", "note", "protein_id" as metadata.
#' ranges(x["CDS"], include = c("product", "note", "protein_id"))
#'
#' ## subset CDSs and exclude "translation"
#' ranges(x["CDS"], include = "all", exclude = "translation")
setGeneric("ranges", function(x, join = FALSE, key = TRUE, include = "none", exclude = "", ...) {
  standardGeneric("ranges")
})

#' Has a feature fuzzy locations?
#' 
#' With a GenBank location like \emph{complement(<123..150)} we don't know the exact
#' start position of the feature. Use \code{fuzzy} to test for fuzzy locations.
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param ... Further arguments passed to methods.
#' @return A logical matrix.
#' @export
#' @examples
#' l <- as.gbLocation("complement(<123..150)")
#' fuzzy(l)
#' 
#' ## note that start() or end() return exact positions even if they are fuzzy.
#' start(l)
setGeneric("fuzzy", signature = "x", function(x, ...) {
  standardGeneric("fuzzy")
})

#' Access the various fields of a GenBank record.
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param ... Further arguments passed to methods.
#' @rdname accessors
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' 
#' getLocus(x)
#' getLength(x)
#' getGeneID(x)
#' getReference(x)
#' getDate(x)
setGeneric('getLocus', function(x, ...) standardGeneric('getLocus'))

#' @rdname accessors
#' @export
setGeneric("getLength", function(x, ...) standardGeneric('getLength'))

#' @rdname accessors
#' @export
setGeneric('getMoltype', function(x, ...) standardGeneric('getMoltype'))

#' @rdname accessors
#' @export
setGeneric('getTopology', function(x, ...) standardGeneric('getTopology'))

#' @rdname accessors
#' @export
setGeneric('getDivision', function(x, ...) standardGeneric('getDivision'))

#' @rdname accessors
#' @export
setGeneric('getDate', function(x) standardGeneric('getDate'))

#' @rdname accessors
#' @export
setGeneric("getDefinition", function(x, ...) standardGeneric("getDefinition"))

#' @rdname accessors
#' @export
setGeneric("getAccession", function(x, ...) standardGeneric("getAccession"))

#' @rdname accessors
#' @export
setGeneric("getVersion", function(x, ...) standardGeneric("getVersion"))

#' @rdname accessors
#' @export
setGeneric("getGeneID", function(x, ...) standardGeneric("getGeneID"))

#' @rdname accessors
#' @export
setGeneric('getDBLink', function(x) standardGeneric('getDBLink'))

#' @rdname accessors
#' @export
setGeneric('getDBSource', function(x) standardGeneric('getDBSource'))

#' @rdname accessors
#' @export
setGeneric('getSource', function(x) standardGeneric('getSource'))

#' @rdname accessors
#' @export
setGeneric('getOrganism', function(x) standardGeneric('getOrganism'))

#' @rdname accessors
#' @export
setGeneric('getTaxonomy', function(x) standardGeneric('getTaxonomy'))

#' @rdname accessors
#' @export
setGeneric('getKeywords', function(x) standardGeneric('getKeywords'))

#' @rdname accessors
#' @export
setGeneric('getReference', function(x) standardGeneric('getReference'))

#' @rdname accessors
#' @export
setGeneric('getComment', function(x) standardGeneric('getComment'))


# header, features, sequence ------------------------------------------------


#' Get the feature table from a GenBank record.
#'
#' @param x A \code{\linkS4class{gbRecord}} instance.
#' @param ... Additional arguments passed to methods.
#' @return The \code{\linkS4class{gbFeatureTable}} of a Genbank record.
#' @rdname getFeatures-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' getFeatures(x)
setGeneric("getFeatures", function(x, ...) standardGeneric("getFeatures"))

#' @rdname getFeatures-methods
#' @export
#' @examples
#' ft(x)
setGeneric("ft", function(x, ...) standardGeneric("ft"))

#' Get the sequence from a GenBank record.
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} instance.
#' @param ... Additional arguments passed to methods.
#' @return An \code{\linkS4class{XStringSet}} object, containing either the
#' complete sequence(s) of the record(s), or of the selected feature(s)
#' @rdname getSequence-methods
#' @export
#' @examples
#' \dontrun{
#' gbk_file <- system.file("extdata", "S_cerevisiae_mito.gb", package = "biofiles")
#' x <- gbRecord(gbk_file)
#' 
#' ## extract the full-length sequence of the record.
#' getSequence(x)
#' 
#' ## extract coding sequences only
#' getSequence(x["CDS"])
#' }
setGeneric("getSequence", function(x, ...) standardGeneric("getSequence"))

#' Extract the header from a \code{"\linkS4class{gbRecord}"} object.
#' 
#' @param x A \code{"\linkS4class{gbRecord}"}, \code{"\linkS4class{gbFeature}"},
#'  or \code{"\linkS4class{gbFeatureTable}"} instance.
#' @param ... Additional arguments passed to methods.
#' @return A \code{"\linkS4class{gbHeader}"} instance
#' @rdname getHeader-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' getHeader(x)
setGeneric("getHeader", function(x, ...) standardGeneric("getHeader"))

#' @rdname getHeader-methods
#' @export
#' @examples
#' header(x)
setGeneric("header", function(x, ...) standardGeneric("header"))

#' Summarise a GenBank record. 
#'
#' @param object An object of class\code{\linkS4class{gbFeature}},
#' \code{\linkS4class{gbFeatureTable}}, \code{\linkS4class{gbRecord}}, or
#' \code{\linkS4class{gbRecordList}}.
#' @param ... Arguments to be passed to methods.
#' @rdname summary-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' summary(x)
setGeneric("summary")

#' Access the indices of GenBank features
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param ... Additional arguments passed to methods.
#' @return A numeric vector of feature indeces.
#' @rdname index-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' index(x)
setGeneric("index", signature = "x", function(x, ...) {
  standardGeneric("index")
})

#' Access genomic locations of GenBank features
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}}, or
#' \code{\linkS4class{gbRecord}} object.
#' @param join Join compound genomic locations to a single range.
#' @param ... Additional arguments passed to methods.
#' @return A list of \code{\linkS4class{gbLocation}} objects
#' @rdname location-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' location(x)
setGeneric("location", signature = "x", function(x, ...) {
  standardGeneric("location")
})

#' Get/set keys of GenBank features
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param ... Additional arguments passed to methods.
#' @rdname key-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' key(x)
setGeneric("key", signature = "x", function(x, ...) {
  standardGeneric("key")
})

#' @param check if \code{FALSE}, don't perform validity checks.
#' @param value The key information to set on \code{x}.
#' @rdname key-methods
#' @export
setGeneric("key<-", signature = "x", function(x, check = TRUE, value) {
  standardGeneric("key<-")
})

#' Get/set qualifiers of GenBank features
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}}, or
#' \code{\linkS4class{gbRecord}} object.
#' @param which A character vector giving the name(s) of the
#' qualifiers to retrieve or set.
#' @param fixed If \code{TRUE}, \code{which} is matched against qualifiers as is,
#' if \code{FALSE} it is treated as a regular expression.
#' @param use.names If \code{TRUE}, return a \code{data.frame} using \code{which}
#' as column names, if \code{FALSE} return, if possible, a character vector or
#' a list.
#' @param ... Additional arguments passed to methods.
#' @return A \code{data.frame}.
#' @rdname qualif-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' qualif(x[[1]], 'db_xref')
#' 
#' ## use shortcuts to common qualifiers
#' proteinID(x["CDS"])
#' locusTag(x["CDS"])
setGeneric("qualif", signature = "x", function(x, which = "", ...) {
  standardGeneric("qualif")
})

#' @param check if \code{FALSE}, don't perform validity checks.
#' @param value The qualifier information to set on \code{x}.
#' @rdname qualif-methods
#' @export
setGeneric("qualif<-", signature = "x", function(x, which, check = TRUE, value) {
  standardGeneric("qualif<-")
})

#' Access the \code{db_xref}s of GenBank features
#' 
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}}, or
#' \code{\linkS4class{gbRecord}} object.
#' @param db (Optional) A character vector giving the database names of the
#' desired \code{db_xref}s.
#' @param ... Additional arguments passed to methods.
#' @return A named character vector (or list of named character vectors)
#' of db_xrefs.
#' @rdname dbxref-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' 
#' ## all db_xrefs associated with CDSs
#' dbxref(x["CDS"])
#' 
#' ## retrieve the TaxId from the "source" field.
#' dbxref(x[[1]], "taxon")
setGeneric("dbxref", signature = "x", function(x, db = NULL, ...) {
  standardGeneric("dbxref")
})


# write/save ----------------------------------------------------------------


#' Save and load \code{gbRecord} objects.
#' 
#' Serialise and unserialise \code{\linkS4class{gbRecord}}s using
#' \code{\link{saveRDS}} and \code{\link{readRDS}}
#' 
#' @param x A \code{\linkS4class{gbRecord}} or \code{\linkS4class{gbRecordList}} instance.
#' @param file A character string naming the file to write to or read from.
#' If \code{NULL}, the accession number will be used to construct a file name.
#' @param dir Target directory. (Default: current working directory)
#' @param ... Arguments passed to \code{\link{saveRDS}}.
#' @rdname saveRecord-methods
#' @export
#' @examples
#' \dontrun{
#' aca <- genomeRecordFromNCBI("Bacteria/Acaryochloris_marina", verbose = TRUE)
#' aca
#' saveRecord(aca)
#' rm(aca)
#' aca <- loadRecord("./NC_009925_NC_009926_NC_009927_NC_009928_NC_009929_NC_0099__.rds")
#' aca
#' }
setGeneric("saveRecord", function(x, file = NULL, dir = ".", ...) standardGeneric("saveRecord"))


#' Write GenBank records or features to file in GenBank format
#'
#' @details
#' For a description of the GenBank format see
#' \url{http://www.ncbi.nlm.nih.gov/collab/FT/}
#' 
#' @param x A \code{\linkS4class{gbRecord}} instance.
#' @param file A connection or a character string naming the file to write to.
#' @param header if \code{FALSE} exclude the Genbank header.
#' @param sequence if \code{FALSE} exclude the sequence.
#' @param append if \code{TRUE} the data is appended to the connection.
#' @param ... Additional arguments passed to methods.
#' @rdname write.GenBank-methods
#' @export
#' @examples
#' \dontrun{
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' write.GenBank(x, file = "data/marine_metagenome.gb")
#' 
#' ## write selected features to file.
#' write.GenBank(x["CDS"], file = "data/marine_metagenome_cds.gb", header = FALSE, sequence = FALSE)
#' }
setGeneric("write.GenBank", function(x, file, append = FALSE, ...) {
  standardGeneric("write.GenBank")
})


#' Write GenBank records or features to file in Feature Table format
#'
#' Feature Tables are simple five-column tab-delimited tables specifying the
#' location and type of each feature. They can be used as input for tbl2asn
#' or Sequin to generate annotation.
#' 
#' @param x A \code{\linkS4class{gbRecord}} instance.
#' @param file A connection or a character string naming the file to write to.
#' @param tablename Optional table name to appear in the first line
#' of the feature table.
#' @param dbname Data base name associated with the CDS qualifier protein_id.
#' @param sequence if \code{TRUE}, additionally output a fasta file.
#' @param append if \code{TRUE} the data is appended to the connection.
#' @param ... Additional arguments passed to methods.
#' @rdname write.FeatureTable-methods
#' @export
#' @examples
#' \dontrun{
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' write.FeatureTable(x, file = "data/marine_metagenome.tbl")
#' }
setGeneric("write.FeatureTable", signature = "x",
           function(x, file, tablename = "", dbname = "",
                    sequence = FALSE, append = FALSE, ...) {
             standardGeneric("write.FeatureTable")
           })


# list-generics ----------------------------------------------------------


#' List the names of Genbank qualifiers.
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecord}} object.
#' @param ... Additional arguments to be passed to or from methods.
#' @return A character vector (or list of character vectors) of 
#'    qualifier names.
#' @seealso
#'    \code{\link{uniqueQualifs}}, \code{\link{hasQualif}}
#' @rdname qualifList-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' qualifList(x["source"])
setGeneric("qualifList", function(x, ...) standardGeneric("qualifList"))


#' Tabulate Genbank qualifiers
#' 
#' Extract a frequency table (or list of tables in the case of
#' \code{gbRecordList}s) of qualifier names.
#'
#' @param x A \code{\linkS4class{gbFeatureTable}}, \code{\linkS4class{gbRecord}},
#' or \code{\linkS4class{gbRecordList}} object.
#' @param ... Additional arguments to be passed to or from methods.
#' @return A \code{\link{table}} (or list of \code{table}s) of 
#'    qualifiers names.
#' @seealso
#'    \code{\link{uniqueQualifs}}, \code{\link{hasQualif}}
#' @rdname qualifTable-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' qualifTable(x)
setGeneric("qualifTable", function(x, ...) standardGeneric("qualifTable"))


#' Tabulate Genbank features
#' 
#' Extract a frequency table (or list of tables in the case of
#' \code{gbRecordList}s) of feature keys.
#'
#' @inheritParams qualifTable
#' @return A \code{\link{table}} (or list of \code{table}s) of 
#'    feature keys.
#' @seealso
#'    \code{\link{qualifTable}}
#' @rdname featureTable-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' featureTable(x)
setGeneric("featureTable", function(x, ...) standardGeneric("featureTable"))



# test-generics ----------------------------------------------------------

#' Has a feature a specific key?
#'  
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param key A feature key. 
#' @param ... Additional arguments to be passed to or from methods.
#' @return A logical vector or a list of logical vectors.
#' @seealso
#'    \code{\link{key}}
#' @rdname hasKey-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' hasKey(x, 'CDS')
setGeneric("hasKey", signature = c("x","key"), function(x, key, ...) {
  standardGeneric("hasKey")
})

#' Has a feature a specific qualifier?
#'
#' @param x A \code{\linkS4class{gbFeature}}, \code{\linkS4class{gbFeatureTable}},
#' \code{\linkS4class{gbRecord}}, or \code{\linkS4class{gbRecordList}} object.
#' @param qualifier A character string. Name of a qualifier.
#' @param ... Additional arguments to be passed to or from methods.
#' @return A logical vector or a list of logical vectors.
#' @seealso
#'  \code{\link{qualifList}}, to extract a list of available qualifiers for
#'  each feature; \code{\link{uniqueQualifs}}, for a vector of all
#'  unique qualifiers present in an object. 
#' @rdname hasQualif-methods
#' @export
#' @examples
#' load(system.file("extdata", "marine_metagenome.rda", package = "biofiles"))
#' hasQualif(x, 'CDS')
setGeneric("hasQualif", signature = c("x", "qualifier"), function(x, qualifier, ...) {
  standardGeneric("hasQualif")
})


# shift and revcomp -------------------------------------------------------------


#' Shift the location of features in a GenBank record
#'
#' @note \code{shift} does not currently handle compound locations In a shifted
#' feature table compound locations get merged.
#' @usage shift(x, shift = 0L, split = FALSE, order = TRUE, ...)
#' @param x A \code{\linkS4class{gbFeatureTable}} or
#' \code{\linkS4class{gbRecord}} instance (gbFeatureTables must 
#' be complete and include a 'source' field).
#' @param shift Number of basepairs (or aa residues) to shift.
#' @param split Split features that after the shift extends across the end of
#' the sequence.
#' @param order Reorder features after the shift.
#' @param ... Additional arguments passed to methods.
#'
#' @return A \code{\linkS4class{gbFeatureTable}} object.
#' @export
#' @examples
#' load(system.file("extdata", "S_cerevisiae_mito.rda", package = "biofiles"))
#' 
#' ## shift the S. cerevisiae mitochondrion such that cytochrome b is the first CDS
#' cytb <- start(filter(x, product = "^cytochrome b$")[[1]])[1]
#' x2 <- shift(x, shift = -cytb + 1, split = TRUE)
setGeneric("shift", signature = "x", function(x, shift = 0L, split = FALSE, order = TRUE, ...) {
  standardGeneric("shift")
})

#' Reverse-complement features in a GenBank record
#' 
#' @usage revcomp(x, order = TRUE, ...)
#' @param x A \code{\linkS4class{gbFeatureTable}} or
#' \code{\linkS4class{gbRecord}} object (gbFeatureTables must 
#' be complete and include a 'source' field).
#' @param order Reorder features after reverse-complementing them.
#' @param ... Additional arguments passed to methods.
#' @rdname revcomp-methods
#' @export
#' @examples
#' load(system.file("extdata", "S_cerevisiae_mito.rda", package = "biofiles"))
#' xr <- revcomp(x)
setGeneric("revcomp", signature = "x", function(x, order = TRUE, ...) standardGeneric("revcomp"))


# view -------------------------------------------------------------------


#' View all features in a \code{gbFeatureTable}
#' 
#' @param x A \code{\linkS4class{gbFeatureTable}} instance.
#' @param n How many features to show (Default: all).
#' @param ... Additional arguments passed to methods.
#' @rdname view-methods
#' @keywords internal
#' @export
setGeneric("view", signature = "x", function(x, n, ...) {
  standardGeneric("view")
})


# filter --------------------------------------------------------------------


#' Return a subset of features or annotations from a GenBank Record
#' 
#' \code{filter} returns a subset of features from \code{\linkS4class{gbRecordList}},
#' \code{\linkS4class{gbRecord}} or \code{\linkS4class{gbFeatureTable}} objects,
#' based on filters provided as \emph{key}, \emph{range}, or \emph{qualifier} values.
#'
#' @details
#' Filters are provided as named values using keywords and/or
#' \dQuote{\emph{qualifier = value}} pairs:
#' 
#' Permissible keywords are:
#' 
#' \describe{
#'   \item{index/idx}{
#'     For example: \code{idx = c(3,4,5,6)}, \code{idx = 100:150},
#'     \code{index = c(1,12:20)}
#'   }
#'   \item{range}{
#'     For example: \code{range = "10000..25000"},
#'     \code{range = "..10000,20000..25000"},
#'     \code{range = "30000.."}
#'   }
#'   \item{key}{
#'     For example: \code{key = "CDS"}, \code{key = c("CDS", "gene")}
#'   }
#'   \item{arbitrary qualifiers}{
#'     For example: \code{product = "ribosomal"}, \code{locus_tag = c("CPSIT_0123",
#'     "CPSIT_0124", "CPSIT_0125")}, \code{pseudo = TRUE}
#'   }
#' }
#' 
#' 
#' @param x A \sQuote{\code{gbRecord}} or \sQuote{\code{gbFeatureTable}}
#' instance.
#' @param ... For \code{filter}: named values that specify the features to select. These are
#' merged with the values of \code{keys} to create the actual query. See
#' Details; for \code{select}: see \code{.cols}.
#' @param .cols A character vector of \sQuote{\emph{keys}} that specify annotaions
#' to be returned as a \code{data.frame} from the filtered features. If \code{NULL},
#' a \sQuote{\code{gbFeatureTable}} is returned.
#' Supported \sQuote{\emph{keys}} are \dQuote{index} or \dQuote{idx}, \dQuote{start},
#' tag (e.g., \dQuote{locus_tag}, \dQuote{product}, \dQuote{db_xref}). Specific
#' \code{db_xref}s can by queried using, e.g. \dQuote{db_xref.GI} or
#' \dQuote{db_xref.GeneID}.
#' 
#' @return Depending on the value of \code{.col} a \code{gbRecordList},
#' \code{gbRecord}, or\code{gbFeatureTable} or a \code{data.frame}.
#' @rdname manip-methods
#' @export
#' @examples
#' load(system.file("extdata", "S_cerevisiae_mito.rda", package = "biofiles"))
#' 
#' ## filter all hydrophobic tRNAs from the yeast mitochondrion
#' hydrophobic <- c("Val", "Ile", "Leu", "Met", "Phe", "Trp", "Cys")
#' trna <- filter(x, key = "tRNA", product = hydrophobic)
#' 
#' ## select start, end, orientation, product, and GeneID
#' df <- select(trna, "start", "end", "strand", "product", "db_xref.GeneID")
#' df
#' 
#' ## combine the above steps into one
#' cols <- c("start", "end", "strand", "product", "db_xref.GeneID")
#' filter(x, key = "tRNA", product = hydrophobic, .cols = cols)
#' 
#' ## filter all CDS from position 60,000 bp onward
#' filter(x, key = "CDS", range = "60000..")
setGeneric("filter", signature = "x", function(x, ...) standardGeneric("filter"))


# select -----------------------------------------------------------------


#' \code{select} returns a specified subset of annotations from GenBank Features
#' as a \code{data.frame}.
#' @rdname manip-methods
#' @export
setGeneric("select", signature = "x", function(x, ...) standardGeneric("select"))


# internal ---------------------------------------------------------------


#' @keywords internal
setGeneric('.seqinfo', function(x) standardGeneric('.seqinfo'))

#' @keywords internal
setGeneric('.header', function(x) standardGeneric('.header'))

#' @keywords internal
setGeneric('.sequence', function(x) standardGeneric('.sequence'))

#' @keywords internal
setGeneric('.locus', function(x) standardGeneric('.locus'))

#' @keywords internal
setGeneric('.features', function(x) standardGeneric('.features'))

#' @keywords internal
setGeneric('.contig', function(x) standardGeneric('.contig'))

#' @keywords internal
setGeneric('.dbSource', function(x) standardGeneric('.dbSource'))

#' @keywords internal
setGeneric('.defline', function(x) standardGeneric('.defline'))

Try the biofiles package in your browser

Any scripts or data that you put into this service are public.

biofiles documentation built on May 2, 2019, 3:31 p.m.