Nothing
#' Access default values for a pgVirtual subclass object
#'
#' This method lets the user view and set the default values used for the
#' different algorithms in FindMyFriends. Many of the parameters are reoccuring
#' and it can become laborious to type them in at each step. These
#' functionalities makes it easy to set defaults on a per-pangenome basis.
#'
#' Currently the following methods support reading defaults from a pgVirtual
#' object. Note that only directly named arguments are supported - arguments
#' passed on through the \code{...}-mechanism are not supported unless they are
#' passed to a function that support it.
#'
#' \itemize{
#' \item \code{\link{graphGrouping}}
#' \item \code{\link{gpcGrouping}}
#' \item \code{\link{variableRegions}}
#' \item \code{\link{plotGroup}}
#' \item \code{\link{kmerLink}}
#' \item \code{\link{plotSimilarity}}
#' \item \code{\link{plotTree}}
#' \item \code{\link{kmerSimilarity}}
#' }
#'
#' @param object A pgVirtual subclass
#'
#' @return A named list of default values
#'
#' @examples
#' # Get all object defaults
#' testPG <- .loadPgExample()
#' defaults(testPG)
#'
#' # Set a new default
#' defaults(testPG)$minFlank <- 2
#'
#' @export
#'
setGeneric("defaults", function(object) {
standardGeneric("defaults")
})
#' @rdname defaults
#'
#' @param value The new values to set
#'
#' @export
#'
setGeneric("defaults<-", function(object, value) {
standardGeneric("defaults<-")
})
#' Get the number of organisms represented in a pangenome
#'
#' This method returns the current number of organisms in a pgVirtual
#' subclass. This is also the result of calling \code{length()} on the object.
#'
#' @param object A pgVirtual subclass
#'
#' @return An integer giving the number of organisms
#'
#' @examples
#' testPG <- .loadPgExample()
#' nOrganisms(testPG)
#'
#' @export
#'
setGeneric( 'nOrganisms', def = function(object) {
standardGeneric('nOrganisms')
})
#' Get the total number of genes in a pangenome
#'
#' This method returns the total number of genes in a pangenome (i.e. the sum
#' of genes in each organism in the pangenome)
#'
#' @param object A pgVirtual subclass
#'
#' @return An integer giving the number of genes in the object
#'
#' @examples
#' testPG <- .loadPgExample()
#' nGenes(testPG)
#'
#' @export
#'
setGeneric('nGenes', def = function(object) {
standardGeneric('nGenes')
})
#' Get the number of gene groups in a pangenome
#'
#' This method gives the number of different gene groups in the object.
#'
#' @param object A pgVirtual subclass
#'
#' @return An integer giving the number of gene groups
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#' nGeneGroups(testPG)
#'
#' @export
#'
setGeneric('nGeneGroups', def = function(object) {
standardGeneric('nGeneGroups')
})
#' Check whether gene groups are defined
#'
#' This method checks whether any grouping of genes has been done on the
#' object and returns TRUE if that is the case.
#'
#' @param object A pgVirtual subclass
#'
#' @return A boolean indicating whether gene groups have been defined (TRUE) or
#' not (FALSE)
#'
#' @examples
#' # Empty pangenome
#' testPG <- .loadPgExample()
#' hasGeneGroups(testPG)
#'
#' # With gene groups
#' testPG <- .loadPgExample(withGroups=TRUE)
#' hasGeneGroups(testPG)
#'
#' @export
#'
setGeneric('hasGeneGroups', def = function(object) {
standardGeneric('hasGeneGroups')
})
#' Checks whether linking of paralogues has been done
#'
#' This method checks for the existance of paralogue links in the object.
#'
#' @param object A pgVirtual subclass
#'
#' @return A boolean indicating whether paralogue links have been defined (TRUE)
#' or not (FALSE)
#'
#' @examples
#' # No paralogues
#' testPG <- .loadPgExample(withGroups=TRUE)
#' hasParalogueLinks(testPG)
#'
#' # With paralogues
#' testPG <- .loadPgExample(withGroups=TRUE, withParalogues=TRUE)
#' hasParalogueLinks(testPG)
#'
#' @export
#'
setGeneric('hasParalogueLinks', def = function(object) {
standardGeneric('hasParalogueLinks')
})
#' Checks for existance of gene location information
#'
#' This method checks whether gene location information is present in the
#' object i.e. if the object inherits from pgVirtualLoc
#'
#' @param object A pgVirtual subclass
#'
#' @return A boolean indicating whether gene location information is present
#' (TRUE) or not (FALSE)
#'
#' @examples
#' # Exclusive pgVirtual subclasses
#' testPG <- .loadPgExample()
#' hasGeneInfo(testPG)
#'
#' # pgVirtualLoc subclasses
#' testPG <- .loadPgExample(geneLoc=TRUE)
#' hasGeneInfo(testPG)
#'
#' @export
#'
setGeneric('hasGeneInfo', def = function(object) {
standardGeneric('hasGeneInfo')
})
#' Get gene location for all genes
#'
#' This method returns the gene location of all genes as a data.frame with each
#' row corresponding to a gene in the pangenome. The data.frame will have the
#' columns 'start', 'end', 'contig' and 'strand' (order of columns not ensured)
#' with start and end giving the start and end position of the gene on the
#' contig/chromosome given in the contig column. Strand gives the direction of
#' translation, 1 is from start to end and -1 is from end to start (thus start
#' should always be lower than end no matter the direction of translation)
#'
#' @param object A pgVirtual subclass
#'
#' @return A data.frame as described above
#'
#' @note Required for subclasses of pgVirtualLoc in order to extend the class
#' system of FindMyFriends
#'
#' @examples
#' testPG <- .loadPgExample(geneLoc=TRUE)
#' head(geneLocation(testPG))
#'
#' @export
#'
setGeneric('geneLocation', def = function(object) {
standardGeneric('geneLocation')
})
#' Check the sequence type of the pangenome
#'
#' This method checks whether the genes in the pangenome are on translated
#' form (amino acid sequences) or not. A return value of FALSE only indicates
#' that the storage mode for the genes is not an AAStringSet. While this leaves
#' room for both RNA-, DNA- and BStringSet, only DNAStringSet makes much sense
#' and is therefore assumed
#'
#' @param object A pgVirtual subclass
#'
#' @return A boolean indicating whether genes are translated (TRUE) or not
#' (FALSE)
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' # Genes are translated
#' translated(testPG)
#'
#' # ... and therefore returned as AAStringSet instead of DNAStringSet
#' class(genes(testPG, subset=1))
#'
#' @export
#'
setGeneric('translated', def = function(object) {
standardGeneric('translated')
})
#' Extract gene sequences from a pangenome
#'
#' This method is used to extract the genomic sequences that is the basis for
#' the pangenome. Genes can be split and subsetted upfront based on other
#' information in the pangenome, such as gene groups and organisms. For some
#' pgVirtual subclasses the subset parameter is mandatory in order to avoid
#' reading all genes into memory at once.
#'
#' @param object A pgVirtual subclass
#'
#' @param split A string giving the optional splitting type. Either 'organism',
#' 'group' or 'paralogue'.
#'
#' @param subset A subsetting of the result equal to using '[]' on the result.
#' It is generally recommended to use this instead of subsetting the result, as
#' it avoids unneeded memory allocation.
#'
#' @return An XStringSet if split is missing or an XStringSetList if it is
#' present
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE, withParalogues=TRUE)
#' # Direct gene access
#' genes(testPG)
#'
#' # Early subsetting
#' genes(testPG, subset=1:10)
#'
#' # Split by membership
#' genes(testPG, split='organism')
#' genes(testPG, split='group')
#' genes(testPG, split='paralogue')
#'
#' # Split and subset - get genes from the first organism
#' genes(testPG, split='organism', subset=1)
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @export
#'
setGeneric('genes', def = function(object, split, subset) {
standardGeneric('genes')
})
#' Get a representative sequence for each gene group
#'
#' This method returns a representative sequence for each of the gene groups
#' defined in the pangenome. Currently the methods defined for selecting
#' sequences are 'random', 'shortest', and 'longest. In case of tie for the two
#' latter the first occurence gets returned. Consensus sequence might be added
#' at a latter stage.
#'
#' @param object A pgVirtual subclass
#'
#' @param method The method to use to get a representative. Either 'random',
#' 'shortest' or 'longest'.
#'
#' @return An XStringSet
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Get a random sequence from each group
#' getRep(testPG, 'random')
#'
#' @export
#'
setGeneric('getRep', def = function(object, method) {
standardGeneric('getRep')
})
#' Get and set the names of the genes in the pangenome
#'
#' These methods lets you query and change the naming of genes in your
#' pangenome. Take note that even though sequences are not in memory for pgLM
#' objects, the names are. This means that changes to the description header in
#' the underlying fasta files have no effect on the naming in your pangenome
#'
#' @param object A pgVirtual subclass
#'
#' @return In case of the getter, a character vector containing the names of
#' each gene.
#'
#' @examples
#' testPG <- .loadPgExample()
#' head(geneNames(testPG))
#'
#' geneNames(testPG)[10] <- 'Gene number 10'
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @export
#'
setGeneric('geneNames', def = function(object) {
standardGeneric('geneNames')
})
#' @rdname geneNames
#'
#' @param value A character vector with new names
#'
#' @export
#'
setGeneric('geneNames<-', def = function(object, value) {
standardGeneric('geneNames<-')
})
#' Get the sequence length of each gene
#'
#' This method extracts the width (i.e. number of residues) of each gene in
#' the pangenome.
#'
#' @param object A pgVirtual subclass
#'
#' @return An integer vector with the length of each sequence
#'
#' @examples
#' testPG <- .loadPgExample()
#' head(geneWidth(testPG))
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @export
#'
setGeneric('geneWidth', def = function(object) {
standardGeneric('geneWidth')
})
#' Get and set the names of organisms in the pangenome
#'
#' These methods lets you manipulate the naming of organisms in the pangenome.
#' By default organisms are named after the fasta file they are defined by, but
#' this can be changed at will.
#'
#' @param object A pgVirtual subclass
#'
#' @return In case of the getter a character vector with names
#'
#' @examples
#' testPG <- .loadPgExample()
#' orgNames(testPG)
#'
#' orgNames(testPG)[3] <- 'Organism 3'
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @export
#'
setGeneric('orgNames', def = function(object) {
standardGeneric('orgNames')
})
#' @rdname orgNames
#'
#' @param value A vector with new names - will be coerced to characters
#'
#' @export
#'
setGeneric('orgNames<-', def = function(object, value) {
standardGeneric('orgNames<-')
})
#' Get and set the names of gene groups in the pangenome
#'
#' These methods lets you manipulate the naming of gene groups in the
#' pangenome. By default organisms are numbered consecutively but this can be
#' changed at will. New gene groups will be numbered though despite what naming
#' scheme has been introduced before.
#'
#' @param object A pgVirtual subclass
#'
#' @return In case of the getter a character vector with names
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#' head(groupNames(testPG))
#'
#' groupNames(testPG)[20] <- 'Gene group 20'
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @export
#'
setGeneric('groupNames', def = function(object) {
standardGeneric('groupNames')
})
#' @rdname groupNames
#'
#' @param value A vector with new names - will be coerced to characters
#'
#' @export
#'
setGeneric('groupNames<-', def = function(object, value) {
standardGeneric('groupNames<-')
})
#' Get and set information about organisms
#'
#' These methods lets you access the information stored about each organism and
#' add to it or modify it. The only information present up front is the number
#' of genes present in each organism. While possible, this information should
#' not be changed manually but through the \code{\link{removeGene}} functions.
#'
#' @param object A pgVirtual subclass
#'
#' @return In case of the getter a data.frame with organism information.
#'
#' @examples
#' testPG <- .loadPgExample()
#' orgInfo(testPG)
#'
#' orgInfo(testPG)$Genus <- 'Mycoplasma'
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @family Metadata
#'
#' @export
#'
setGeneric('orgInfo', def = function(object) {
standardGeneric('orgInfo')
})
#' @rdname orgInfo
#'
#' @param value A data.frame with a row for each organism
#'
#' @export
#'
setGeneric('orgInfo<-', def = function(object, value) {
standardGeneric('orgInfo<-')
})
#' Get and set information about gene group
#'
#' These methods lets you access the information stored about each gene group
#' and add to it or modify it. Upfront the following columns are present:
#' 'description', 'group', 'paralogue', 'GO', 'EC', 'nOrg' and 'nGenes'. All
#' except 'group', 'nOrg' and 'nGenes' are filled with NA as default. The latter
#' are prefilled with information derived from the grouping itself and should
#' not be modified manually. 'description' is meant to contain a human readable
#' description of the functionality of the gene group, 'GO' should contain GO
#' terms (stored in a list of character vectors) and EC should contain enzyme
#' numbers (again stored as a list of character vectors). There is no check for
#' the validity of the content so it is up to the user to ensure that the terms
#' added are valid. Additional columns can be added at will.
#'
#' @param object A pgVirtual subclass
#'
#' @return In case of the getter a data.frame with organism information.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#' head(groupInfo(testPG))
#'
#' groupInfo(testPG)$description[1] <- 'transposase'
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @family Metadata
#'
#' @export
#'
setGeneric('groupInfo', def = function(object) {
standardGeneric('groupInfo')
})
#' @rdname groupInfo
#'
#' @param value A data.frame with a row for each group
#'
#' @export
#'
setGeneric('groupInfo<-', def = function(object, value) {
standardGeneric('groupInfo<-')
})
#' Get the pangenome matrix
#'
#' This method lets you extract the pangenome matrix of the pangenome. It is not
#' possible to directly change the pangenome matrix as it not necessary stored
#' in the object but might be calculated on request. Either way the pangenome
#' matrix is a function of the gene grouping and should be changed by changing
#' the gene grouping instead of being manipulated downstream.
#'
#' @param object A pgVirtual subclass
#'
#' @return A matrix with organisms as columns and gene groups as rows
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' head(pgMatrix(testPG))
#'
#' @export
#'
setGeneric('pgMatrix', def = function(object) {
standardGeneric('pgMatrix')
})
#' Use igraph to create gene grouping from a similarity matrix
#'
#' This method takes a similarity matrix based on all genes in the pangenome,
#' converts it to a graph representation and uses one of igraphs community
#' detection algorithms to split all genes into groups. Within the FindMyFriends
#' framework the similarity matrix would usually come from
#' \code{\link{kmerSimilarity}}, but it can just as well be defined in other
#' ways e.g. be blast derived.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters to be passed on to the community detection algorithm
#'
#' @return An object of the same class as 'object'.
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' # Too heavy to include
#' \dontrun{
#' # Generate similarity matrix
#' simMat <- kmerSimilarity(testPG, lowerLimit=0.75)
#'
#' # Group genes
#' testPG <- graphGrouping(testPG, simMat)
#' }
#'
#' @family grouping algorithms
#'
#' @export
#'
setGeneric('graphGrouping', def = function(object, ...) {
standardGeneric('graphGrouping')
})
#' Guided Pairwise Comparison grouping of genes
#'
#' This algorithm recursively builds up a pangenome by merging subpangenomes.
#' The recursion follows either a supplied hierarchical clustering or one
#' created using kmer comparison for the full organism. At each step a
#' representative for each gene group is selected randomly as a representative
#' and gets compared to all other representatives. Gene groups are then merged
#' based on the pangenome created for the representatives. Due to the sampling
#' of representatives at each step there is a certain randomness to the
#' algorithm. Results should be fairly stable though, as gene groups are
#' compared multiple times.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return An object of the same class as 'object'.
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' # Too heavy to include
#' \dontrun{
#' testPG <- gpcGrouping(testPG)
#' }
#'
#' @family grouping algorithms
#'
#' @export
#'
setGeneric('gpcGrouping', def = function(object, ...) {
standardGeneric('gpcGrouping')
})
#' Gene grouping by preclustering with CD-HIT
#'
#' This grouping algorithm partly mimicks the approach used by Roary, but
#' instead of using BLAST in the second pass it uses cosine similarity of kmer
#' feature vectors, thus providing an even greater speedup. The algorithm uses
#' the CD-HIT algorithm to precluster highly similar sequences and then groups
#' these clusters by extracting a representative and clustering these using the
#' standard FindMyFriends kmer cosine similarity.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return An object of the same class as 'object'.
#'
#' @references
#' Page, A. J., Cummins, C. A., Hunt, M., Wong, V. K., Reuter, S., Holden, M. T.
#' G., et al. (2015). Roary: rapid large-scale prokaryote pan genome analysis.
#' \emph{Bioinformatics}, btv421.
#'
#' Fu, L., Niu, B., Zhu, Z., Wu, S., Li, W. (2012). CD-HIT:
#' accelerated for clustering the next generation sequencing data.
#' \emph{Bioinformatics}, \bold{28} (23), 3150--3152.
#'
#' Li, W. and Godzik, A. (2006) Cd-hit: a fast program for clustering and
#' comparing large sets of protein or nucleotide sequences.
#' \emph{Bioinformatics}, \bold{22}, 1658--9.
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' testPG <- cdhitGrouping(testPG)
#'
#' @family grouping algorithms
#'
#' @export
#'
setGeneric('cdhitGrouping', def = function(object, ...) {
standardGeneric('cdhitGrouping')
})
setGeneric('precluster', def = function(object, ...) {
standardGeneric('precluster')
})
#' Define gene grouping manually
#'
#' In cases where results from other algorithms are wished to be imported into
#' the FindMyFriends framework, this method ensures that the proper formatting
#' is done. The grouping can be defined as an integer vector with an element
#' for each gene. The value of each element is then used as the gene group
#' classifier. Alternatively groups can be defined by a list of integer vectors.
#' Each element of the list defines a group and the content of each element
#' refers to gene indexes.
#'
#' @param object A pgVirtual subclass
#'
#' @param groups Either a list or integer vector defining the grouping
#'
#' @return An object of the same class as 'object'.
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' # Load grouping data
#' groups <- system.file('extdata', 'examplePG', 'groupsWG.txt',
#' package='FindMyFriends'
#' )
#' groups <- scan(groups, what=integer(), quiet=TRUE)
#'
#' # Do the grouping
#' testPG <- manualGrouping(testPG, groups)
#'
#' @family grouping algorithms
#'
#' @export
#'
setGeneric('manualGrouping', def = function(object, groups) {
standardGeneric('manualGrouping')
})
#' Calculate a similarity matrix based on kmers
#'
#' This method takes a pangenome and calculate a similarity matrix based on
#' cosine similarity of kmer feature vectors in an all-vs-all fashion. The
#' result can subsequently be used to group genes either using
#' \code{\link{graphGrouping}} or homebrewed grouping scheme. In case of the
#' latter \code{\link{manualGrouping}} should be used to add the grouping back
#' to the pangenome.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return A matrix (sparse or normal) with cosine similarity for each gene pair
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' # Too heavy to include
#' \dontrun{
#' kmerSim <- kmerSimilarity(testPG, lowerLimit=0.75)
#' }
#'
#' @export
#'
setGeneric('kmerSimilarity', def = function(object, ...) {
standardGeneric('kmerSimilarity')
})
#' Split gene groups by neighborhood synteny
#'
#' This function evaluates already created gene groups and splits the members
#' into new groups based on the synteny of the flanking genes and the similarity
#' of the sequences. In general the splitting is based on multiple stages that
#' all gene pairs must pass in order to remain in the same group. First the link
#' between the genes is removed if they are part of the same organism. Then the
#' synteny of the flanking genes are assessed and if it doesn't passes the
#' defined threshold the link between the gene pair is removed. Then the kmer
#' similarity of the two sequences are compared and if below a certain threshold
#' the link is removed. Lastly the length of the two sequences are compared and
#' if below a certain threshold the link is removed. Based on this new graph
#' cliques are detected and sorted based on the lowest within-clique sequence
#' similarity and neighborhood synteny. The cliques are then added as new groups
#' if the members are not already members of a new group until all members are
#' part of a new group. This approach ensures that all members of the new
#' groupings passes certain conditions when compared to all other members of the
#' same group. After the splitting a refinement step is done where gene groups
#' with high similarity and sharing a neighbor either up- or downstream are
#' merged together to avoid spurius errors resulting from the initial grouping.
#'
#' @param object A pgVirtualLoc subclass
#'
#' @param ... parameters passed on.
#'
#' @return An object with the same class as object containing the new grouping.
#'
#' @family group-splitting
#'
#' @examples
#' testPG <- .loadPgExample(geneLoc=TRUE, withGroups=TRUE)
#'
#' # Too heavy to run
#' \dontrun{
#' testPG <- neighborhoodSplit(testPG, lowerLimit=0.75)
#' }
#'
#' @export
#'
setGeneric('neighborhoodSplit', def = function(object, ...) {
standardGeneric('neighborhoodSplit')
})
#' Link gene groups by homology
#'
#' This method allows the user to define a secondary grouping of genes be
#' linking gene groups based on sequence similarity (paralogues). A
#' representative for each gene group is used for the calculations and the
#' similarity is assessed using the kmer based cosine similarity.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on to the community detection algorithm.
#'
#' @return An object with the same class as object with linking between gene
#' groups.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # No paralogue links
#' hasParalogueLinks(testPG)
#'
#' # Create the links
#' testPG <- kmerLink(testPG)
#'
#' @export
#'
setGeneric('kmerLink', def = function(object, ...) {
standardGeneric('kmerLink')
})
#' Add new organisms to an existing pangenome
#'
#' This method allows new genomes to be added to an already processed pangenome,
#' preserving existing grouping and adding new genes to their relevant groups.
#' This makes it possible to gradually grow the pangenome as new sequences
#' becomes available without redoing the grouping at each time, loosing the gene
#' group metadata.
#'
#' @param object A pgVirtual subclass to merge the new genomes into
#'
#' @param newSet An object of the same class as object containing the new
#' organisms to add. Grouping of the genes contained in this object can already
#' exist, if not it will be done automatically.
#'
#' @param ... parameters passed on.
#'
#' @return An object of the same class as object containing the new organisms
#' from newSet and possible new gene groups from genes with no orthologues in
#' the original pangenome.
#'
#' @examples
#' # Get base pangenome
#' pg <- .loadPgExample(geneLoc = TRUE, withGroups = TRUE,
#' withNeighborhoodSplit = TRUE)
#' # Get some additional genomes
#' location <- tempdir()
#' unzip(system.file('extdata', 'Mycoplasma.zip', package = 'FindMyFriends'),
#' exdir = location)
#' genomeFiles <- list.files(location, full.names = TRUE, pattern = '*.fasta')[6:10]
#' pg2 <- pangenome(genomeFiles, translated = TRUE, geneLocation = 'prodigal')
#'
#' # Combine the two (too computational heavy to include)
#' \dontrun{
#' pg3 <- addGenomes(pg, pg2, nsParam = list(lowerLimit = 0.8))
#' }
#'
#' @export
#'
setGeneric('addGenomes', def = function(object, newSet, ...) {
standardGeneric('addGenomes')
})
#' Remove genes from a pangenome
#'
#' This method makes it possible to safely remove genes from a pangenome using a
#' variaty of selection mechanisms depending on the supplied parameters. The
#' name parameter refers to the gene name, organism refers to either organism
#' name or index, group refers to either gene group name or index and ind refers
#' to the gene index. See examples for details of the different possibilities.
#'
#' @param object A pgVirtual subclass
#'
#' @param name A character vector of names of genes to remove
#'
#' @param organism Either an integer or character vector of orgnanisms to remove
#' genes from. If neither name nor ind is given all genes in the organisms are
#' removed.
#'
#' @param group Either an integer or character vector of gene groups to remove
#' genes from. If ind is not given all genes in the groups are removed.
#'
#' @param ind Indexes of the selections to remove. If both name, organism and
#' group is not given, it indexes into the raw gene index, otherwise it indexes
#' into the element defined by organism or group.
#'
#' @param ... parameters passed on (currently ignored).
#'
#' @return An object of the same class as object without the genes that should
#' be removed.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#' nGenes(testPG)
#'
#' # Remove gene number 6
#' removeGene(testPG, ind=5)
#'
#' # Remove all genes from organism 'AE017244'
#' removeGene(testPG, organism='AE017244')
#'
#' # Remove first gene in gene group 10
#' removeGene(testPG, group=10, ind=1)
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @export
#'
setGeneric('removeGene', def = function(object, name, organism, group, ind,
...) {
standardGeneric('removeGene')
})
#' Merge paralogue gene groups into new gene groups
#'
#' This method allows for merging of paralogue gene groups defined using
#' \code{\link{kmerLink}} into new, bigger, gene groups.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on to metadata collapse function. For
#' combineInfo='merge' sep specifies the separator - sep='none' collapses
#' information into list elements instead of strings. For combineInfo='largest'
#' no addition arguments are given.
#'
#' @return An object of the same class as object with the new grouping.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE, withParalogues=TRUE)
#'
#' # Number of gene groups before collapse
#' nGeneGroups(testPG)
#'
#' # Number of gene groups after collapse
#' testPG <- collapseParalogues(testPG, combineInfo='largest')
#' nGeneGroups(testPG)
#'
#' @export
#'
setGeneric('collapseParalogues', def = function(object, ...) {
standardGeneric('collapseParalogues')
})
#' Safely add group info
#'
#' This method allows for adding of group metadata by specifying the name of the
#' metadata and the gene groups it should be added to. It protects the user from
#' overwriting information that is derived from the data, and ensures the proper
#' formatting. Should be prefered to \code{\link{groupInfo<-}} for all but the
#' simplest cases.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return An object of the same class as object with the new gene group
#' information.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Create some info
#' info <- data.frame(nickname=c('Tessie', 'Johnny'), index=c(4, 500))
#'
#' # Add it to the object
#' testPG <- addGroupInfo(testPG, info=info, key='index')
#'
#' @family Metadata
#'
#' @export
#'
setGeneric('addGroupInfo', def = function(object, ...) {
standardGeneric('addGroupInfo')
})
#' Safely add organisms info
#'
#' This method allows for adding of organism metadata by specifying the name of
#' the metadata and the organisms it should be added to. It protects the user
#' from overwriting information that is derived from the data and ensures proper
#' formatting. Should be prefered to \code{\link{orgInfo<-}} for all but the
#' simplest cases.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return An object of the same class as object with the added organism
#' information.
#'
#' @examples
#' testPG <- .loadPgExample()
#'
#' # Create some information
#' info <- data.frame(location=c('Copenhagen', 'Paris', 'London'),
#' name=c('AE017243', 'AP012303', 'AE017244')
#' )
#'
#' # Add the information
#' testPG <- addOrgInfo(testPG, info=info, key='name')
#'
#' @family Metadata
#'
#' @export
#'
setGeneric('addOrgInfo', def = function(object, ...) {
standardGeneric('addOrgInfo')
})
#' Calculate statistics about each gene group
#'
#' This method calculates a range of statistics and positional information
#' about each gene group. The information returned are. Maximum number of genes
#' from the same organism (paralogues), shortest sequence length, longest
#' sequence length, standard deviation of sequence lengths, index of genes in
#' group, downstream and upstream gene groups.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return A list with an element for each gene group, each with the following
#' elements.
#' \describe{
#' \item{maxOrg}{The highest number of distinct genes from the same organism
#' present in the group. A number above 1 indicate the presence of paralogues.}
#' \item{minLength}{The length of the shortest sequence in the group.}
#' \item{maxLength}{The length of the longest sequence in the group.}
#' \item{sdLength}{The standard deviation of lengths in the group.}
#' \item{genes}{The index for the genes present in the group.}
#' \item{backward}{A character vector with gene groups separated by ';' that
#' lies downstream of the gene group. The number of gene groups for each gene
#' is controlled by the flankSize argument. If the contig stops before the
#' required number of flanking genes have been reached, NA will be added.
#' Downstream is defined in relation to the strand of the contig/chromosome,
#' and not the translational direction of the gene in question.}
#' \item{forward}{As above in the other direction.}
#' }
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' grStats <- groupStat(testPG)
#'
#' @export
#'
setGeneric('groupStat', def = function(object, ...) {
standardGeneric('groupStat')
})
#' Calculate statistics about each organism
#'
#' This method, much like {code{\link{groupStat}}} calculates different
#' statistics for each organism in the pangenome. Depending on the parameters
#' the statistics are: number of genes, minimum length of gene, maximum length
#' of gene standard deviation of gene lengths, residue frequency, number of gene
#' groups and number of paralogues.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... parameters passed on.
#'
#' @return A data.frame with a row per organism, with each statistic in a column
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' orgStats <- orgStat(testPG)
#'
#' @export
#'
setGeneric('orgStat', def = function(object, ...) {
standardGeneric('orgStat')
})
#' Calculate the panchromosome graph
#'
#' This method creates a graph representation of the panchromosome - The
#' complete set of gene groups linked together by chromosomal position. Each
#' vertice in the graph represent a gene group and each edge represent a
#' positional relation between two gene groups (neighboring each other).
#' Vertices are annotated with number of genes, organism names and strand while
#' edges are annotated with numer of genes (as weight), and organism names.
#'
#' @param object A pgVirtualLoc subclass
#'
#' @param ... parameters passed on
#'
#' @return An igraph object
#'
#' @examples
#' testPG <- .loadPgExample(geneLoc=TRUE, withNeighborhoodSplit=TRUE)
#'
#' panchromosome <- pcGraph(testPG)
#'
#' @export
#'
setGeneric('pcGraph', def = function(object, ...) {
standardGeneric('pcGraph')
})
#' Detect regions of high variability in the panchromosome
#'
#' This method analyses the panchromosome and detects regions of local
#' non-linearity. These regions often corresponds to areas with
#' insertion/deletions, frameshifts or general high plasticity. It works by
#' examining each vertice of the panchromosome with an out degree above 2 and
#' detect cycles within the neighborhood of these vertices. Adjacent cycles are
#' then joined together to form bigger groups of high variability.
#'
#' @param object A pgVirtualLoc subclass
#'
#' @param ... parameters to pass on
#'
#' @return A list of variable regions. Each element contains the following
#' elements:
#' \describe{
#' \item{type}{Either 'ins/den', 'frameshift', 'hub', 'plastic' or 'end'.
#' ins/del are regions where the two outgoing vertices are directly connected.
#' frameshift are regions where the two outgoing vertices are connected through
#' two different routes, but not directly. hub are regions with more than two
#' outgoing vertices. plastic are regions where the two outgoing vertices are
#' connected through multiple different paths. end are regions with only one
#' outgoing vertice.}
#' \item{members}{The gene groups being part of the region.}
#' \item{flank}{The outgoing vertices connecting the region to the rest of the
#' panchromosome.}
#' \item{connectsTo}{The gene group(s) each flank connects to outside of the
#' region}
#' \item{graph}{The subgraph of the panchromosome representing the region}
#' }
#'
#' @examples
#' testPG <- .loadPgExample(geneLoc=TRUE, withNeighborhoodSplit=TRUE)
#'
#' # Too heavy to include
#' \dontrun{
#' regions <- variableRegions(testPG)
#'
#' # Have a look at the first region
#' regions[[1]]
#' }
#'
#' @export
#'
setGeneric('variableRegions', def = function(object, ...) {
standardGeneric('variableRegions')
})
#' Extract a graph representation of a gene group neighborhood
#'
#' This method creates a graph representation of the imidiate neighborhood of
#' a gene group. It is different from creating a subgraph of the panchromosome
#' in that only vertices and edges directly reachable from the gene group is
#' included. The vertices will be annotated with a centerGroup property
#' indicating whether or not the node is the queried gene group.
#'
#' @param object A pgVirtualLoc subclass
#'
#' @param ... Parameters passed on.
#'
#' @return An igraph object with gene groups as vertices and positional
#' connections as edges. The edges is weighted according to the number of genes
#' sharing the connection. All vertices have a centerGroup attribute, which is
#' FALSE for all but the center group.
#'
#' @examples
#' testPG <- .loadPgExample(geneLoc=TRUE, withNeighborhoodSplit=TRUE)
#'
#' # Look at the surroundings of group 10
#' neighborhood <- getNeighborhood(testPG, group=10)
#'
#' @seealso \code{\link{plotNeighborhood}} for nice plotting of the neighborhood
#'
#' @export
#'
setGeneric('getNeighborhood', def = function(object, ...) {
standardGeneric('getNeighborhood')
})
#' Plot (very) basic statistics on the pangenome
#'
#' This method plots the number of genes in each organism and, if gene groups
#' have been defined, the number of singleton, accessory and core gene groups.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Parameters passed on to color scale.
#'
#' @return This function is called for its side effects
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Should make a nice little plot
#' plotStat(testPG)
#'
#' @export
#'
setGeneric('plotStat', def = function(object, ...) {
standardGeneric('plotStat')
})
#' Plot the neighborhood of a gene group
#'
#' This method plots the neighborhood extracted using
#' \code{\link{getNeighborhood}} in a visually pleasing way. It is mainly a
#' wrapper around \code{\link[igraph]{plot.igraph}} to ensure the proper
#' information is visualised.
#'
#' @param object A pgVirtualLoc subclass
#'
#' @param ... Parameter passed on to igraph's plot method.
#'
#' @return Called for the side effect of creating a plot. Invisibly returns an
#' igraph object with all visual parameters set as node and edge attributes.
#'
#' @examples
#' testPG <- .loadPgExample(geneLoc=TRUE, withNeighborhoodSplit=TRUE)
#'
#' # Nice little overview of the neighborhood of gene group 30
#' plotNeighborhood(testPG, 30)
#'
#' @export
#'
setGeneric('plotNeighborhood', def = function(object, ...) {
standardGeneric('plotNeighborhood')
})
#' Plot the similarities of genes within a group
#'
#' This method plots a gene group with genes as vertices and cosine similarities
#' as weighted edges. Mildly informative at best :-)
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Parameters to be passed on to igraphs plotting method
#'
#' @return Called for the side effect of creating a plot. Invisibly returns an
#' igraph object with all visual parameters set as node and edge attributes.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' plotGroup(testPG, 10, lowerLimit=0.25)
#'
#' @export
#'
setGeneric('plotGroup', def = function(object, ...) {
standardGeneric('plotGroup')
})
#' Plot the evolution in gene groups
#'
#' This method constructs a plot showing how the number of singleton, accessory
#' and core gene groups evolve as the size of the pangenome increases. Different
#' ways of increasing the size of the pangenome is available.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Parameters to be passed on
#'
#' @return This function is called for its side effects
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Standard type - organisms ordered by their index in the pangenome
#' plotEvolution(testPG, ordering='none')
#'
#' # Bootstrapped with confidence intervals
#' plotEvolution(testPG, ordering='bootstrap')
#'
#' @export
#'
setGeneric('plotEvolution', def = function(object, ...) {
standardGeneric('plotEvolution')
})
#' Create a heatplot with similarities between all organisms
#'
#' This method creates a heatplot showing the similarity between all organisms
#' in the pangenome. The similarity can either be derived from the pangenome
#' matrix or from kmer calculations of the genes themselves.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Parameters to be passed on.
#'
#' @return This function is called for its side effects
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Use kmers
#' plotSimilarity(testPG, type='kmer')
#'
#' # Use pangenome matrix
#' plotSimilarity(testPG, type='pangenome')
#'
#' @seealso \code{\link{plotTree}} for a dendrogram plot of the same data.
#'
#' @export
#'
setGeneric('plotSimilarity', def = function(object, ...) {
standardGeneric('plotSimilarity')
})
#' Plot a dendrogram of the organisms in a pangenome
#'
#' This method plots a dendrogram of the relationship between the organisms in
#' the pangenome. It does not tries to by phylogenetic in any way but merely
#' shows the relationship in data. As with \code{\link{plotSimilarity}} it can
#' be based on either the pangenome matrix or kmer feature vectors.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Parameters to be passed on.
#'
#' @return This function is called for its side effects
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' plotTree(testPG, type='pangenome', dist='binary', clust='ward.D2')
#'
#' # And now in a circle (type defaults to 'pangenome')
#' plotTree(testPG, circular=TRUE, dist='binary', clust='ward.D2')
#'
#' @seealso \code{\link{plotSimilarity}} for a heatmap plot of the same data.
#'
#' @export
#'
setGeneric('plotTree', def = function(object, ...) {
standardGeneric('plotTree')
})
#' Add gene grouping to pangenome
#'
#' This is an internal function, not meant to be called directly. For adding
#' gene grouping manually see \code{\link{manualGrouping}}. This method is a
#' requirement for classes inheriting from pgVirtual and is not relevant for
#' everyday users.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Parameters to be passed on
#'
#' @return An object with the same class as object
#'
#' @note For internal use only. Required for extending the class system of
#' FindMyFriends
#'
#' @note Required for subclasses of pgVirtual in order to extend the class
#' system of FindMyFriends
#'
#' @rdname internalGroupGenes
#' @name internal-groupGenes
#' @aliases groupGenes
#' @keywords internal
#'
#' @export
#'
setGeneric('groupGenes', def = function(object, ...) {
standardGeneric('groupGenes')
})
#' Get gene-to-organism relationship
#'
#' This method returns the organism membership for each gene in the pangenome as
#' a vector of indices. Element 1 corresponds to gene 1 and the value is the
#' index of the corresponding organism.
#'
#' @param object A pgVirtual subclass
#'
#' @return An integer vector with an element for each gene in the pangenome.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Stored sequentially so the first will belong to organism 1
#' head(seqToOrg(testPG))
#'
#' @note Required for extending the class system of FindMyFriends
#'
#' @seealso \code{\link{seqToGeneGroup}} for gene-to-genegroup relationship
#'
#' @export
#'
setGeneric('seqToOrg', def = function(object) {
standardGeneric('seqToOrg')
})
#' Get gene-to-genegroup relationship
#'
#' This method returns the group membership for each gene in the pangenome as
#' a vector of indices. Element 1 corresponds to gene 1 and the value is the
#' index of the corresponding gene group. If gene groups have yet to be defined
#' it returns a vector of length 0.
#'
#' @param object A pgVirtual subclass
#'
#' @return An integer vector with an element for each gene in the pangenome.
#'
#' @examples
#' testPG <- .loadPgExample(withGroups=TRUE)
#'
#' # Have a look at what the first six genes belongs to
#' head(seqToGeneGroup(testPG))
#'
#' @note Required for extending the class system of FindMyFriends
#'
#' @seealso \code{\link{seqToOrg}} for gene-to-organism relationship
#'
#' @export
#'
setGeneric('seqToGeneGroup', def = function(object) {
standardGeneric('seqToGeneGroup')
})
#' Add metadata to the pangenome
#'
#' These methods are only for internal use and not relevant for regular users.
#' They are required for subclasses of pgVirtual and allows FindMyFriends to add
#' and change metadata as part of the pipelines.
#'
#' @param object A pgVirtual subclass
#'
#' @param name The name of the metadata to set
#'
#' @param info A vector of metadata
#'
#' @param key The indexes the metadata in info pertains to
#'
#' @param ... Parameters to be passed on
#'
#' @return An object of the same class as object
#'
#' @note For internal use only. Use \code{\link{addGroupInfo}} and
#' \code{\link{addOrgInfo}} instead. Required for extending the class system of
#' FindMyFriends.
#'
#' @rdname internalMetadata
#' @name internal-metadata
#' @aliases setGroupInfo
#' @keywords internal
#'
#' @export
#'
setGeneric('setGroupInfo', def = function(object, ...) {
standardGeneric('setGroupInfo')
})
#' @rdname internalMetadata
#'
#' @export
#'
setGeneric('setOrgInfo', def = function(object, ...) {
standardGeneric('setOrgInfo')
})
#' Merge information from two pangenomes
#'
#' This method is for internal use only and should not be called directly. Use
#' \code{\link{addGenomes}} instead. It is required for subclasses of pgVirtual.
#'
#' @param pg1 A pgVirtual subclass
#'
#' @param pg2 An object of the same class as pg1
#'
#' @param geneGrouping The grouping of the genees in the merged pangenome.
#' Equivalent to calling seqToGeneGroup on the new object
#'
#' @param groupInfo The metadata on the gene groups in the merged pangenome.
#' Equivalent to calling groupInfo on the new object
#'
#' @param ... Parameters to be passed on
#'
#' @return An object of the same class as pg1
#'
#' @note For internal use only. Required for extending the class system of
#' FindMyFriends.
#'
#' @rdname internalMergePangenomes
#' @name internal-mergePangenomes
#' @aliases mergePangenomes
#' @keywords internal
#'
#' @export
#'
setGeneric('mergePangenomes', def = function(pg1, pg2, ...) {
standardGeneric('mergePangenomes')
})
#' Split gene groups based on similarity
#'
#' This function splits up gene groups based on cosine similarity of kmer
#' feature vectors. It uses hard splitting based on a similarity cutoff where
#' unconnected components constitutes new groups. Unlike
#' \code{\link{neighborhoodSplit}}, paralogues cannot be forced into separate
#' groups as information needed for this is not present.
#'
#' @param object A pgVirtual subclass
#'
#' @param ... Arguments passed on
#'
#' @return A new pgVirtual subclass object of the same class as 'object'
#'
#' @family group-splitting
#'
#' @examples
#' # Get a grouped pangenome
#' pg <- .loadPgExample(withGroups = TRUE)
#'
#' \dontrun{
#' # Split groups by similarity (Too heavy to include)
#' pg <- kmerSplit(pg, lowerLimit = 0.8)
#' }
#'
#' @export
#'
setGeneric('kmerSplit', def = function(object, ...) {
standardGeneric('kmerSplit')
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.