Nothing
# make barebones msa obj
.makeObj.msa <- function() {
x <- list()
class(x) <- "msa"
x
}
##' Creates a copy of an MSA sequence
##'
##' If m is stored in R (as it is by default), then m2 <- copy.msa(m1)
##' is no different than m2 <- m1. But if it is stored as a pointer
##' to a C structure, this is the only way to make an explicit copy
##' of the MSA object.
##' @title MSA copy
##' @param x an MSA object
##' @return an MSA object which can be modified independently from the
##' original object
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
copy.msa <- function(x) {
if (is.null(x$externalPtr)) return(x)
rv <- .makeObj.msa()
rv$externalPtr <- .Call.rphast("rph_msa_copy", x$externalPtr)
rv
}
##' Check an MSA object
##' @param msa An object to tests
##' @return A logical indicating whether object is of type \code{msa}
##' @export
##' @keywords msa
##' @author Melissa J. Hubisz
is.msa <- function(msa) {
class(msa)=="msa"
}
##' Creates a new MSA object given sequences.
##'
##' Make a new multiple sequence alignment (MSA) object given a vector of
##' character strings. They can be optionally annotated with sample names.
##'
##' Each character string in seqs must be the same length, and number of
##' elements in names (if provided) must match the number of elements in
##' seqs.
##'
##' Alphabet generally does not have to be specified if working with
##' DNA alignments.
##'
##' About storing objects as pointers:
##' If \code{pointer.only==FALSE}, the MSA object will be stored in R and can be
##' viewed and modified by base R code as well as RPHAST functions.
##' Setting \code{pointer.only=TRUE} will cause the object to be stored by
##' reference, as an external pointer to an object created by C code. This
##' may be necessary to improve performance, but the object can then only
##' be viewed/manipulated via RPHAST functions. Furthermore, if an object
##' is stored as a pointer, then its value is liable to be changed when
##' passed as an argument to a function. All RPHAST functions which change
##' the value of an external pointer make a note of this in the help pages
##' for that function. For example, extract.feature.msa will alter an
##' alignment if it is passed in as an external pointer (the argument will
##' be changed into the return value). If this is undesireable, the copy.msa
##' function can be used: extract.feature.msa(copy.msa(align)) will preserve
##' the original alignment. Simple copying, ie, \code{align2->align1} of
##' objects stored as pointer will not behave like normal R objects: both objects
##' will point to the same C structure, and both will be changed if either one
##' is altered. Instead \code{align2 <- copy.msa(align1)} should be used.
##' @title MSA Objects
##' @param seqs a character vector containing sequences, one per sample
##' @param names a character vector identifying the sample name for each
##' sequence. If \code{NULL}, use "seq1", "seq2", ...
##' @param alphabet a character string containing valid non-missing character
##' states
##' @param is.ordered a logical indicating whether the alignment columns
##' are stored in order. If NULL, assume columns are ordered.
##' @param offset an integer giving the offset of coordinates for the
##' reference sequence from the beginning of the chromosome. The reference
##' sequence is assumed to be the first sequence. Not used
##' if is.ordered==FALSE.
##' @param pointer.only a boolean indicating whether returned alignment object
##' should be stored by reference (see Details)
##' @useDynLib rphast, .registration = TRUE
##' @keywords msa
##' @export msa
##' @example inst/examples/msa.R
##' @author Melissa J. Hubisz and Adam Siepel
msa <- function(seqs, names = NULL, alphabet="ACGT", is.ordered=TRUE,
offset=NULL, pointer.only=FALSE) {
#checks
seqlen <- unique(sapply(seqs, nchar))
if (length(seqlen) > 1L) {
stop("sequences should all have same length")
}
if (is.null(names))
names <- sprintf("seq%i", 1:length(seqs))
# check number of names
if (!is.null(names) && length(names) != length(seqs)) {
stop("number of names needs to match number of sequences")
}
check.arg(alphabet, "alphabet", "character")
check.arg(pointer.only, "pointer.only", "logical", null.OK=FALSE)
check.arg(is.ordered, "is.ordered", "logical", null.OK=TRUE)
check.arg(offset, "offset", "integer", null.OK=TRUE)
if (is.null(is.ordered)) is.ordered <- TRUE
if ( (!is.ordered) && (!is.null(offset)) && offset!=0)
offset <- NULL
# TODO: if alphabet non-null, check that seqs only contains those chars?
x <- .makeObj.msa()
if (pointer.only) {
x$externalPtr <- .Call.rphast("rph_msa_new",
seqsP=seqs,
namesP=names,
nseqsP=length(seqs),
lengthP=seqlen,
alphabetP=alphabet,
orderedP=is.ordered,
offsetP=offset)
} else {
x$seqs <- seqs
if (! is.null(names)) x$names <- names
if (! is.null(alphabet)) x$alphabet <- alphabet
x$is.ordered <- is.ordered
if (!is.null(offset)) x$offset <- offset
}
x
}
##' Returns the length of sequence in an MSA alignment.
##' @title MSA Sequence Length.
##' @param x an MSA object
##' @param refseq character vector giving name(s) of sequence whose
##' length to return. The default \code{NULL} implies the frame of
##' reference of the entire alignment.
##' @return an integer vector containing the length of the named sequences.
##' If refseq is NULL, returns the number of columns in the alignment.
##' @keywords msa
##' @seealso \code{\link{msa}}
##' @export
##' @export ncol.msa
##' @author Melissa J. Hubisz and Adam Siepel
##' @example inst/examples/ncol-msa.R
##' @method ncol msa
ncol.msa <- function(x, refseq=NULL) {
if (is.null(x$externalPtr) && is.null(refseq)) {
return(nchar(x$seqs[1]))
}
check.arg(refseq, "refseq", "character", null.OK=TRUE, min.length=1L,
max.length=NULL)
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
if (is.null(refseq))
return(.Call.rphast("rph_msa_seqlen", x$externalPtr, NULL))
result <- integer(length(refseq))
for (i in 1:length(refseq))
result[i] <- .Call.rphast("rph_msa_seqlen", x$externalPtr, refseq[i])
result
}
##' Obtain the range of coordinates in a MSA objects
##' @param x An object of type \code{msa}
##' @param refseq A character string identifying the reference sequence
##' (or NULL to use frame of reference of entire alignment)
##' @return A numeric vector of length 2 giving the smallest and highest
##' coordinate in the alignment. If refseq is the first sequence in alignment,
##' offset.msa(x) is added to the range, otherwise it is ignored.
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
coord.range.msa <- function(x, refseq=names.msa(x)[1]) {
if (!is.msa(x)) stop("x is not an MSA object")
numcol <- ncol.msa(x, refseq)
if (!is.null(refseq) && refseq==names.msa(x)[1]) {
off <- offset.msa(x)
return(c(off+1, off+numcol))
}
return(c(1, numcol))
}
##' Returns the dimensions of an msa object as (# of species, # of columns)
##' @param x An object of type \code{msa}
##' @return An integer vector of length two giving number of species and
##' number of columns in the alignment
##' @method dim msa
##' @export dim.msa
##' @export
##' @keywords msa
##' @author Melissa J. Hubisz and Adam Siepel
dim.msa <- function(x) {
c(nrow.msa(x), ncol.msa(x, NULL))
}
##' The number of informative columns in an alignment
##' @param x An object of type \code{msa}
##' @return The number of "informative" columns in the msa. An informative
##' column has at least two non-missing and non-gap characters.
##' @export
##' @keywords msa
##' @seealso \code{pairwise.diff.msa} To get differences per base
##' between pairs of sequences
##' @author Melissa J. Hubisz and Adam Siepel
ninf.msa <- function(x) {
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
.Call.rphast("rph_msa_ninformative_sites", x$externalPtr)
}
##' Returns the number of sequence in an MSA alignment.
##' @title MSA Number of Sequences
##' @param x an MSA object
##' @return an integer containing the number of sequences in an alignment.
##' @keywords msa
##' @seealso \code{\link{msa}}
##' @export
##' @export nrow.msa
##' @author Melissa J. Hubisz and Adam Siepel
##' @example inst/examples/nrow-msa.R
##' @method nrow msa
nrow.msa <- function(x) {
if (is.null(x$externalPtr)) {
return(length(x$seqs))
}
.Call.rphast("rph_msa_nseq", x$externalPtr)
}
##' Returns TRUE if the argument is a valid string describing a
##' multiple sequence alignment (MSA) format.
##'
##' Valid formats include "FASTA", "PHYLIP", "SS" (Sufficient statistics
##' format used by PHAST), "MPM" (format used by MultiPipMaker),
##' "LAV" (used by blastz), or "MAF" (Multiple Alignment Format used by
##' MULTIZ and TBA.
##' @title Check an MSA Format String
##' @param format a character vector of strings to test
##' @return a logical vector indicating whether each element of the
##' input parameter is a valid format string.
##' @keywords msa
##' @export
##' @example inst/examples/is-format-msa.R
##' @author Melissa J. Hubisz
is.format.msa <- function(format) {
if (is.null(format)) return(NULL)
result <- logical(length(format))
for (i in 1:length(format))
result[i] <- .Call.rphast("rph_msa_valid_fmt_str", format[i]);
result
}
##' Returns the offset of the first position in an alignment from
##' some reference sequence.
##' @title MSA Index Offset
##' @param x an MSA object
##' @return The difference between the first position in an alignment
##' from the beginning of a chromosome.
##' @keywords msa
##' @export
##' @example inst/examples/offset-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
offset.msa <- function(x) {
if (!is.null(x$externalPtr)) {
return(.Call.rphast("rph_msa_idxOffset", msaP=x$externalPtr))
}
if (is.null(x$offset)) return(0)
x$offset
}
##' Returns the alphabet used by an MSA object.
##' @title MSA Alphabet
##' @param x an MSA object
##' @return the valid non-missing-data characters for an MSA object.
##' @keywords msa
##' @export
##' @example inst/examples/alphabet-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
alphabet.msa <- function(x) {
if (!is.null(x$externalPtr)) {
return(.Call.rphast("rph_msa_alphabet", msaP=x$externalPtr))
}
x$alphabet
}
##' Determines if an MSA object represents an ordered alignment.
##' @title MSA is Ordered?
##' @param x an MSA object
##' @return a boolean indicating whether the columns are in order
##' @keywords msa
##' @export
##' @export is.ordered.msa
##' @author Melissa J. Hubisz and Adam Siepel
##' @example inst/examples/is-ordered-msa.R
##' @method is.ordered msa
is.ordered.msa <- function(x) {
if (!is.null(x$externalPtr)) {
return(.Call.rphast("rph_msa_isOrdered", msaP=x$externalPtr))
}
if (is.null(x$is.ordered)) return (TRUE)
x$is.ordered
}
##' Returns the sequence names for an MSA object.
##' @title MSA Sequence Names
##' @param x an MSA object
##' @return a character vector giving the names of the sequences, or
##' NULL if they are not defined
##' @keywords msa
##' @export
##' @export names.msa
##' @method names msa
##' @example inst/examples/names-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
names.msa <- function(x) {
if (!is.null(x$externalPtr)) {
return(.Call.rphast("rph_msa_seqNames", msaP=x$externalPtr))
}
x$names
}
##' Take an MSA stored by reference and return one stored in R
##' @title MSA From Pointer
##' @param src an MSA object stored by reference
##' @return an MSA object stored in R. If src is already stored in R,
##' returns a copy of the object.
##' @seealso \code{\link{msa}} for details on MSA storage options.
##' @keywords msa
##' @export
##' @example inst/examples/from-pointer-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
from.pointer.msa <- function(src) {
if (is.null(src$externalPtr)) return(src)
seqs <- .Call.rphast("rph_msa_seqs", src$externalPtr)
names <- .Call.rphast("rph_msa_seqNames", src$externalPtr)
alphabet <- .Call.rphast("rph_msa_alphabet", src$externalPtr)
ordered <- .Call.rphast("rph_msa_isOrdered", src$externalPtr)
offset <- .Call.rphast("rph_msa_idxOffset", src$externalPtr)
msa(seqs, names, alphabet, is.ordered=ordered,
offset=offset, pointer.only=FALSE)
}
##' Take an MSA stored in R and return one stored by reference
##' @title MSA To Pointer
##' @param src an MSA object stored by value in R
##' @return an MSA object stored by reference as a pointer to an object
##' created in C.
##' @seealso \code{\link{msa}} for details on MSA storage options.
##' @keywords msa
##' @export
##' @example inst/examples/as-pointer-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
as.pointer.msa <- function(src) {
if (!is.null(src$externalPtr)) return(src)
msa(seqs=src$seq,
names=src$names,
alphabet=src$alphabet,
is.ordered=src$is.ordered,
offset=src$offset,
pointer.only=TRUE)
}
##' Guess the format of an MSA file by looking at the file contents.
##' @title MSA Guess Format
##' @param filename A vector of file names
##' @param method Either "content" or "extension". "content" implies to
##' open the file and guess the format based on content; "extension" simply
##' guesses based on the extension on the file name (it does not open the
##' file). This argument will be recycled to the length of filename.
##' @return A character vector giving the format of each file
##' (one of "MAF", "FASTA", "LAV", "SS", "PHYLIP", "MPM", or "UNKNOWN").
##' @seealso is.format.msa
##' @keywords msa
##' @export
##' @example inst/examples/guess-format-msa.R
##' @author Melissa J. Hubisz
guess.format.msa <- function(filename, method="content") {
if (is.null(filename)) return(NULL)
result <- character(length(filename))
method <- rep(method, length.out=length(filename))
for (i in 1:length(filename)) {
if (method[i] == "content") {
result[i] <- .Call.rphast("rph_msa_format_for_content", filename[i])
} else if (method[i] == "extension") {
result[i] <- .Call.rphast("rph_msa_format_for_suffix", filename[i])
} else stop("guess.format.msa: unknown method ", method[i], "\n")
}
result
}
##' Writes a multiple sequence alignment (MSA) object to a file
##' in one of several formats.
##' @title Writing MSA Objects to Files
##' @param x an object of class msa
##' @param file File to write (will be overwritten). If NULL, output
##' goes to terminal.
##' @param format format to write MSA object. Valid values are "FASTA",
##' "PHYLIP", "MPM", or "SS".
##' @param pretty.print Whether to pretty-print alignment (turning
##' bases which match the first base in the same column to ".").
##' @note pretty.print does not work if format="SS".
##' @keywords msa FASTA PHYLIP MPM SS
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
##' @usage write.msa(x, file=NULL,
##' format=ifelse((f <- guess.format.msa(file, method="extension"))=="UNKNOWN", "FASTA", f),
##' pretty.print=FALSE)
write.msa <- function(x, file=NULL,
format=ifelse((f <- guess.format.msa(file, method="extension"))=="UNKNOWN", "FASTA", f),
pretty.print=FALSE) {
#checks
check.arg(file, "file", "character", null.OK=TRUE)
check.arg(format, "format", "character", null.OK=FALSE)
check.arg(pretty.print, "pretty.print", "logical", null.OK=FALSE)
if (! is.format.msa(format)) {
stop(paste("invalid MSA FORMAT \"", format, "\"", sep=""))
}
msa <- x
if (is.null(msa$externalPtr)) {
printMsa <- as.pointer.msa(msa)
} else {
printMsa <- msa
}
invisible(.Call.rphast("rph_msa_printSeq",
msaP=printMsa$externalPtr,
fileP=file,
formatP=format,
pretty.printP=pretty.print))
}
##' Prints a short description of an MSA (multiple sequence alignment)
##' object.
##'
##' @title MSA Summary
##' @param object an MSA object
##' @param ... additional arguments sent to \code{print}
##' @param print.seq whether to supress printing of the alignment
##' @param format to print sequence in if printing alignment
##' @param pretty.print whether to pretty.print pretty-print sequence if printing alignment
##' @keywords msa
##' @seealso \code{\link{print.msa}}
##' @export
##' @export summary.msa
##' @method summary msa
##' @example inst/examples/summary-msa.R
##' @author Melissa J. Hubisz
summary.msa <- function(object, ...,
print.seq=ncol.msa(object)<100 && nrow.msa(object)<30,
format="FASTA",
pretty.print=FALSE) {
msa <- object
check.arg(print.seq, "print.seq", "logical", null.OK=FALSE)
# format and pretty.print are checked in write.msa
cat(paste("msa object with", nrow.msa(msa), "sequences and",
ncol.msa(msa),"columns, stored"))
if (is.null(msa$externalPtr)) cat(" in R") else cat(" as a pointer to a C structure")
cat("\n")
if (!is.null(format) || pretty.print) {
print.seq=TRUE
}
pointer <- msa$externalPtr
names <- names.msa(msa)
alphabet <- alphabet.msa(msa)
is.ordered <- is.ordered.msa(msa)
offset <- offset.msa(msa)
printMsa <- list()
printed <- FALSE
if (!is.null(names)) printMsa$names <- names
if (!is.null(alphabet)) printMsa$alphabet <- alphabet
if (!is.null(is.ordered)) printMsa$is.ordered <- is.ordered
if (!is.null(offset) && offset!=0) printMsa$offset <- offset
if (print.seq && is.null(pointer) && is.null(format) && !pretty.print) {
printMsa$seq <- msa$seq
printed <- TRUE
}
print(printMsa, ...)
if (!printed && print.seq) {
cat("$seq\n")
if (is.null(format)) format <- "FASTA"
write.msa(msa, file=NULL, format, pretty.print)
cat("\n")
}
}
##' Prints an MSA (multiple sequence alignment) object.
##'
##' Valid formats for printing are "FASTA", "PHYLIP", "MPM", and "SS".
##' See \code{\link{is.format.msa}} for details on these formats.
##' If format is specified, the alignment is printed regardless of
##' print.seq.
##'
##' Pretty-printing will cause all characters in a column which match
##' the value in the first row to be printed as ".". It only works for
##' FASTA, PHYLIP, or MPM formats.
##'
##' If print.seq==TRUE, then the default printing format depends on whether
##' the sequence is stored by value (the default storage mode), or by
##' reference. If the MSA is stored by value, the default format is
##' as a R character vector. Otherwise, the default format is FASTA.
##'
##' @title Printing MSA objects
##' @param x an object of class msa
##' @param ... additional arguments sent to \code{print}
##' @param print.seq whether to supress printing of the alignment
##' @param format to print sequence in if printing alignment
##' @param pretty.print whether to pretty.print pretty-print sequence if printing alignment
##' @keywords msa
##' @export
##' @export print.msa
##' @method print msa
##' @example inst/examples/print-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
print.msa <- function(x, ..., print.seq=ifelse(ncol.msa(x)*nrow.msa(x) < 500, TRUE, FALSE),
format=NULL, pretty.print=FALSE) {
check.arg(print.seq, "print.seq", "logical", null.OK=FALSE)
# format and pretty.print are checked in write.msa
summary.msa(x, print.seq=print.seq, format=format, pretty.print=pretty.print, ...)
if (!print.seq && is.null(format)) {
cat("(alignment output suppressed)\n")
cat("\n")
}
}
##' Reads an MSA from a file.
##' @title Reading an MSA Object
##' @param filename The name of the input file containing an alignment.
##' @param format input file format: one of "FASTA", "MAF", "SS", "PHYLIP",
##' "MPM", must be correctly specified.
##' @param alphabet the alphabet of non-missing-data chraracters in the
##' alignment. Determined automatically from the alignment if not given.
##' @param features An object of type \code{feat}. If provided, the return
##' value will only
##' contain portions of the alignment which fall within a feature.
##' The alignment will not be ordered.
##' The loaded regions can be further constrained with the do.4d or
##' do.cats options. Note that if this object is passed as a pointer to a
##' structure stored in C, the values will be altered by this function!
##' @param do.4d Logical. If \code{TRUE}, the return value will contain only
##' the columns corresponding to four-fold degenerate sties. Requires
##' features to be specified.
##' @param ordered Logical. If \code{FALSE}, the MSA object may not retain
##' the original column order.
##' @param tuple.size Integer. If given, and if pointer.only is \code{TRUE},
##' MSA will be stored in sufficient statistics format, where each tuple
##' contains tuple.size consecutive columns of the alignment.
##' @param do.cats Character vector if features is provided; integer vector
##' if cats.cylce is provided. If given, only the types of features named
##' here will be represented in the (unordered) return alignment.
##' @param refseq Character string specifying a FASTA format file with a
##' reference sequence. If given, the reference sequence will be
##' "filled in" whereever missing from the alignment.
##' @param offset An integer giving offset of reference sequence from
##' beginning of chromosome. Not used for MAF or SS format.
##' @param seqnames A character vector. If provided, discard any sequence
##' in the msa that is not named here. This is only implemented efficiently
##' for MAF input files, but in this case, the reference sequence must be
##' named.
##' @param discard.seqnames A character vector. If provided, discard
##' sequenced named here. This is only implemented efficiently for MAF
##' input files, but in this case, the reference sequenced must NOT be
##' discarded.
##' @param pointer.only If \code{TRUE}, MSA will be stored by reference as
##' an external pointer to an object created by C code, rather than
##' directly in R memory. This improves performance and may be necessary
##' for large alignments, but reduces functionality. See
##' \code{\link{msa}} for more details on MSA object storage options.
##' @note If the input is in "MAF" format and features is specified, the
##' resulting alignment will be stripped of gaps in the reference (1st)
##' sequence.
##' @return an MSA object.
##' @keywords msa FASTA MAF PHYLIP SS
##' @seealso \code{\link{msa}}, \code{\link{read.feat}}
##' @export
##' @example inst/examples/read-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
read.msa <- function(filename,
format=c(guess.format.msa(filename), "FASTA")[1],
alphabet=NULL,
features=NULL,
do.4d=FALSE,
ordered=(do.4d==FALSE && is.null(features)),
tuple.size=(if(do.4d) 3 else NULL),
do.cats=NULL,
refseq=NULL,
offset=0,
seqnames=NULL, discard.seqnames=NULL,
pointer.only=FALSE) {
if (ordered && do.4d) {
warning("cannot keep order when reading 4d sites; returning un-ordered alignment")
ordered = FALSE
}
if (ordered && !is.null(features) && nrow.feat(features) > 1) {
warning("cannot keep order when subsetting by feature with more than one row; returning un-ordered alignment")
ordered = FALSE
}
cats.cycle <- NULL
check.arg(filename, "filename", "character", null.OK=FALSE)
check.arg(format, "format", "character", null.OK=FALSE)
check.arg(alphabet, "alphabet", "character", null.OK=TRUE)
check.arg(features, "feat", null.OK=TRUE, min.length=NULL, max.length=NULL)
check.arg(do.4d, "do.4d", "logical", null.OK=FALSE)
check.arg(ordered, "ordered", "logical", null.OK=FALSE)
check.arg(tuple.size, "tuple.size", "integer", null.OK=TRUE)
check.arg(cats.cycle, "cats.cycle", "integer", null.OK=TRUE)
if (! (is.null(features) || is.null(cats.cycle)))
stop("cannot provide both features and cats.cycle")
if (!is.null(features)) {
check.arg(do.cats, "do.cats", "character", null.OK=TRUE,
min.length=NULL, max.length=NULL)
} else if (!is.null(cats.cycle)) {
check.arg(do.cats, "do.cats", "integer", null.OK=TRUE,
min.length=NULL, max.length=NULL)
} else if (!is.null(do.cats)) {
warning("do.cats ignored unless features or cats.cycle provided")
}
check.arg(refseq, "refseq", "character", null.OK=TRUE)
check.arg(offset, "offset", "integer", null.OK=TRUE)
check.arg(seqnames, "seqnames", "character", null.OK=TRUE,
min.length=NULL, max.length=NULL)
check.arg(discard.seqnames, "discard.seqnames", "character", null.OK=TRUE,
min.length=NULL, max.length=NULL)
check.arg(pointer.only, "pointer.only", "logical", null.OK=FALSE)
if (!is.format.msa(format))
stop(paste("invalid format string", format))
if (!is.null(tuple.size) && tuple.size <= 0)
stop("tuple.size should be integer >= 1")
if (do.4d) {
if (!is.null(do.cats))
stop("should not specify do.cats if do.4d==TRUE")
if (is.null(features))
stop("features needs to be specified with do.4d")
if (tuple.size != 3)
stop("tuple.size must be 3 if do.4d==TRUE")
}
if (!is.null(do.cats) && is.null(features))
stop("features required with do.cats")
if (!is.null(features)) {
if (is.null(features$externalPtr)) {
features <- as.pointer.feat(features)
} else features <- copy.feat(features)
}
x <- .makeObj.msa()
x$externalPtr <- .Call.rphast("rph_msa_read", filename, format,
features$externalPtr, do.4d, alphabet,
tuple.size, refseq, ordered, cats.cycle,
do.cats, offset, seqnames,
discard.seqnames)
if (!pointer.only) x <- from.pointer.msa(x)
if (format != "MAF") { # if format is MAF we used seqnames on the fly
# sub.msa(seqnames, ...) doesn't convert msa to pointer so doing this
# after from.pointer.msa call is still efficient
if (!is.null(seqnames))
x <- sub.msa(x, seqnames, keep=TRUE)
if (!is.null(discard.seqnames))
x <- sub.msa(x, discard.seqnames, keep=FALSE)
}
x
}
##' DNA complement
##'
##' @title complement
##' @param x A character vector with DNA sequences to be complemented
##' @return The complement of the given sequence(s). Characters other
##' than A,C,G,T,a,c,g,t are unchanged.
##' @keywords msa
##' @export
##' @author Melissa J. Hubisz
complement <- function(x) {
chartr("ACGTacgt", "TGCAtgca", x)
}
##' Reverse complement a multiple sequence alignment
##' @param x An object of type \code{msa}.
##' @return The reverse complement of msa.
##' @note If x is stored as a pointer to an object in C, x will be changed to
##' its reverse complement. Use reverse.complement(copy.msa(x)) to avoid this
##' behavior. The return value will be a pointer if the input value was stored
##' as a pointer.
##' @keywords msa
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
reverse.complement.msa <- function(x) {
if (is.null(x$externalPtr)) {
pointer.only <- FALSE
x <- as.pointer.msa(x)
} else pointer.only <- TRUE
.Call.rphast("rph_msa_reverse_complement", x$externalPtr)
if (!pointer.only) x <- from.pointer.msa(x)
x
}
##' Get a subset of an alignment
##'
##' @title MSA Subset
##' @param x An object of type \code{msa}
##' @param seqs The sequence names to keep (or to remove if keep is
##' \code{FALSE})
##' @param keep Whether to keep the named sequences or remove them
##' @param start.col the first column to keep (columns indices start at 1)
##' @param end.col the last column to keep (inclusive)
##' @param refseq A character string naming the sequence in the alignment
##' which determines the
##' coordinates for start.col and end.col. If NULL, start.col and
##' end.col are column indices in the multiple alignment.
##' @param pointer.only If \code{TRUE}, return an msa object which is only
##' a pointer to a C structure (advanced use only).
##' @return A new MSA object containing a subset of the original MSA.
##' @note If x is stored as a pointer and represents an unordered alignment,
##' it may be ordered after this call. Otherwise it will not be changed.
##' @export
##' @keywords msa
##' @example inst/examples/sub-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
sub.msa <- function(x, seqs=NULL, keep=TRUE, start.col=NULL, end.col=NULL,
refseq=NULL, pointer.only=FALSE) {
check.arg(keep, "keep", "logical", null.OK=FALSE)
check.arg(seqs, "seqs", "character", null.OK=TRUE,
min.length=NULL, max.length=NULL)
check.arg(start.col, "start.col", "integer", null.OK=TRUE)
check.arg(end.col, "end.col", "integer", null.OK=TRUE)
check.arg(refseq, "refseq", "character", null.OK=TRUE)
check.arg(pointer.only, "pointer.only", "logical", null.OK=FALSE)
result <- .makeObj.msa()
if (is.null(x$externalPtr)) {
x <- as.pointer.msa(x)
}
result$externalPtr <- .Call.rphast("rph_msa_sub_alignment",
x$externalPtr, seqs, keep,
start.col, end.col, refseq)
if (!pointer.only)
result <- from.pointer.msa(result)
result
}
##' Strip gaps from an alignment.
##'
##' If strip.mode can be a vector of integers or a vector of character
##' strings. If it is a vector of integers, these are the indices of
##' the sequences from which to strip gaps.
##' If strip.mode is vector of character strings, each string names a
##' sequence from which to strip gaps.
##'
##' strip.mode can also be the string "all.gaps" or "any.gaps". The former
##' will strip columns containing only gaps, whereas the latter strips
##' columns containing even a single gap.
##'
##' @title MSA Strip Gaps
##' @param x MSA object
##' @param strip.mode Determines which gaps to strip. See Details
##' @return an MSA object, with gaps stripped according to strip.mode.
##' @note If x is passed as a pointer to a C structure (ie,
##' it was created with pointer.only=TRUE), then this function will directly
##' modify x. Use strip.gaps.msa(copy.msa(x)) to avoid this behavior. Also,
##' the return value will be stored as a pointer if x is stored as a pointer;
##' otherwise the return value will be stored in R.
##' @keywords msa
##' @export
##' @example inst/examples/strip-gaps-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
strip.gaps.msa <- function(x, strip.mode=1) {
names <- NULL
nseq <- NULL
if (is.null(x$externalPtr)) {
names <- names.msa(x)
nseq <- nrow.msa(x)
x <- as.pointer.msa(x)
pointer.only <- FALSE
} else pointer.only <- TRUE
for (s in strip.mode) {
if (s=="all.gaps" || s=="any.gaps") {
.Call.rphast("rph_msa_strip_gaps", x$externalPtr, 0, s)
} else {
if (!is.character(s)) {
if (is.null(nseq)) nseq <- nrow.msa(x)
if (as.integer(s) != s || s <=0 || s>nseq)
stop(cat("invalid sequence index", s))
w <- s
} else {
if (is.null(names))
names <- names.msa(x)
w <- which(names==s)
if (is.null(w))
stop(cat("no sequence with name", s))
}
x$externalPtr <- .Call.rphast("rph_msa_strip_gaps",
x$externalPtr, w, NULL)
}
}
if (!pointer.only)
x <- from.pointer.msa(x)
x
}
##' Extract, replace, reorder MSA
##'
##' Treat multiple sequence alignment as a matrix where each row
##' corresponds to a sequence for one species, and each column
##' is one position aligned across all species.
##'
##' The bracket notation can return a subset of the alignment,
##' or re-order rows and columns.
##' @param x An object of type \code{msa}
##' @param rows A numeric vector of sequence indices,
##' character vector (containing sequence name), or
##' logical vector (containing sequences to keep). If logical vector it
##' will be recycled as necessary to the same length as \code{nrow.msa(x)}.
##' @param cols A numeric vector of alignment columns, or a logical vector
##' containing columns to keep. If logical vector it will be recycled as
##' necessary to the same length as \code{ncol.msa(x)}. Note that these are
##' in coordinates with respect to the entire alignment. x$idx.offset
##' is ignored here.
##' @param pointer.only If \code{TRUE}, return an object which is only
##' a pointer to a structure stored in C (useful for large alignments;
##' advanced use only). In certain cases when the original alignment
##' is stored in R, it may be more efficient return an object in R, in which
##' case this argument will be ignored.
##' @seealso \code{\link{sub.msa}} which can subset columns based on genomic
##' coordinates, and \code{\link{extract.feature.msa}} which can subset based
##' on genomic coordinates denoted in a features object.
##' @usage \method{[}{msa}(x, rows, cols, pointer.only)
##' @method "[" msa
##' @note This function will not alter the value of x even if it is stored as
##' a pointer to a C structure.
##' @keywords msa
##' @export "[.msa"
##' @export
##' @rdname square-bracket-msa
##' @example inst/examples/square-bracket-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
"[.msa" <- function(x, rows, cols, pointer.only=FALSE) {
if (!missing(rows)) {
if (is.null(rows)) stop("rows cannot be empty")
} else rows=NULL
if (!missing(cols)) {
if (is.null(cols)) stop("cols cannot be empty")
} else cols=NULL
check.arg(pointer.only, "pointer.only", "logical", null.OK=FALSE)
if (!is.null(rows)) {
# if rows are given by names, convert to integer
if (is.character(rows)) {# names are given
names <- names.msa(x)
rows <- as.numeric(sapply(rows, function(x) {which(x == names)}))
if (sum(is.na(rows)) > 0L)
stop("unknown names in first dimension subset")
}
}
# check if arguments are given as logicals.
if (is.logical(rows)) {
rows <- which(rep(rows, length.out = nrow.msa(x)))
}
if (is.logical(cols))
cols <- which(rep(cols, length.out = ncol.msa(x)))
# if x is stored in R, sampling rows is easier and more efficient to do here
if (!is.null(rows) && is.null(x$externalPtr)) {
x$names <- x$names[rows]
x$seqs <- x$seqs[rows]
rows <- NULL
}
if (is.null(rows) && is.null(cols)) return(x)
check.arg(rows, "rows", "integer", null.OK=TRUE,
min.length=NULL, max.length=NULL)
check.arg(cols, "cols", "integer", null.OK=TRUE,
min.length=NULL, max.length=NULL)
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
rv <- .makeObj.msa()
rv$externalPtr <- .Call.rphast("rph_msa_square_brackets", x$externalPtr,
rows, cols)
if (!pointer.only)
rv <- from.pointer.msa(rv)
rv
}
##' Replace subsets of an alignment
##' @param x An object of type \code{msa}
##' @param rows A numeric vector of sequence indices, character vector
##' (containing sequence names), or logical vector. If logical vector,
##' it will be recycled as necessary to the length of \code{nrow.msa(x)}.
##' If not provided, all rows are selected.
##' @param cols A numeric vector of alignment columns, or a logical
##' vector. If logical vector it will be recycled to the same length
##' as \code{ncol.msa(x)}. Note that these are coordinates with respect
##' to the entire alignment. x$idx.offset is ignored here. If cols is not
##' provided, all columns are selected.
##' @param value The value to replace in the indicated rows/columns. Should
##' be a character representing a base (ie, "A", "C", "G", "T", "N", "-").
##' Can be a single value or a vector of values which match number of selected
##' cells. This value will be recycled to the necessary length, and an error
##' produced if the necessary length is not an even multiple of
##' \code{length(value)}. Can also give a single character string, in which
##' case it will be expanded into a vector using \code{strsplit}.
##' @return An object of type \code{msa} with the chosen rows/columns
##' replaced by value.
##' @note If \code{x} is stored as a pointer, x will be changed to the
##' return value.
##' @method "[<-" msa
##' @usage \method{[}{msa}(x, rows, cols) <- value
##' @export "[<-.msa"
##' @export
##' @rdname square-bracket-assign-msa
##' @example inst/examples/square-bracket-assign-msa.R
##' @author Melissa J. Hubisz
"[<-.msa" <- function(x, rows, cols, value) {
if (!is.msa(x))
stop("[<-.msa expects x to be an msa object")
isPointer <- !is.null(x$externalPtr)
x <- as.pointer.msa(x)
if (!is.character(value))
stop("value should be of type character")
if (length(value)==1L && nchar(value) > 1L)
value <- strsplit(value, "")[[1]]
if (unique(sapply(value, nchar)) != 1L)
stop("each element of value should be a single character")
if (!missing(rows)) {
if (is.null(rows)) stop("rows cannot be NULL")
} else rows=NULL
if (!missing(cols)) {
if (is.null(cols)) stop("cols cannot be NULL")
} else cols=NULL
if (!is.null(rows)) {
# if rows are given by names, convert to integer
if (is.character(rows)) {# names are given
names <- names.msa(x)
rows <- as.numeric(sapply(rows, function(x) {which(x == names)}))
if (sum(is.na(rows)) > 0L)
stop("unknown names in first dimension subset")
}
}
if (is.logical(rows)) {
if (nrow.msa(x) %% length(rows) != 0)
stop("number of rows in x is not a multiple of length(rows)")
rows <- which(rep(rows, length.out = nrow.msa(x)))
}
if (is.logical(cols)) {
if (ncol.msa(x) %% length(cols) != 0)
stop("number of cols in x is not a multiple of length(cols)")
cols <- which(rep(cols, length.out = ncol.msa(x)))
}
# now get value in the correct dimensions if it isn't already
if (is.null(rows)) numrow <- nrow.msa(x) else numrow <- length(rows)
if (is.null(cols)) numcol <- ncol.msa(x) else numcol <- length(cols)
if (numrow*numcol %% length(value) != 0)
stop("number of items to replace is not a multiple of replacement length")
if (sum(cols < 1 | cols > ncol.msa(x)) != 0)
stop("cols out of range")
if (sum(rows < 1 | rows > nrow.msa(x)) != 0)
stop("rows out of range")
# don't do the recycling here; save memory and recycle in C code
.Call.rphast("rph_msa_square_bracket_equals", x$externalPtr,
rows, cols, value)
if (!isPointer)
x <- from.pointer.msa(x)
x
}
##' Obtain posterior probilities of every state at every node
##' @param x An object of type \code{msa}
##' @param tm An object of type \code{tm}
##' @param every.site If \code{TRUE}, return probabilities for every site
##' rather than every site pattern (this may be very redundant and large
##' for a large alignment with few species).
##' @return An array giving the posterior probabilities of all states for
##' every unique site pattern, or for every site if every.site is
##' \code{TRUE}
##' @export
##' @example inst/examples/postprob-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
postprob.msa <- function(x, tm, every.site=FALSE) {
if (!is.msa(x))
stop("x is not an MSA object")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
tm <- as.pointer.tm(tm)
every.site <- check.arg(every.site, "every.site", "logical", null.OK=FALSE)
rphast.simplify.list(.Call.rphast("rph_msa_postprob",
x$externalPtr, tm$externalPtr,
every.site))
}
##' Obtain expected number of substitutions on each branch and site
##' @param x An object of type \code{msa}
##' @param tm An object of type \code{tm}
##' @return An array giving the expected number of substitutions on each
##' branch at each unique site pattern, summed across all types of
##' substitutions.
##' @export
##' @example inst/examples/expected-subs-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
expected.subs.msa <- function(x, tm) {
if (!is.msa(x))
stop("x is not an MSA object")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
tm <- as.pointer.tm(tm)
rphast.simplify.list(.Call.rphast("rph_msa_exp_subs",
x$externalPtr, tm$externalPtr))
}
##' Obtain expected number of substitutions of each type on each branch
##' @param x An object of type \code{msa}
##' @param tm An object of type \code{tm}
##' @return An array giving the expected number of substitutions on each
##' branch, for each type of substitution.
##' @export
##' @example inst/examples/total-expected-subs-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
total.expected.subs.msa <- function(x, tm) {
if (!is.msa(x))
stop("x is not an MSA object")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
tm <- as.pointer.tm(tm)
rphast.simplify.list(.Call.rphast("rph_msa_exp_tot_subs",
x$externalPtr, tm$externalPtr))
}
##' Obtain expected number of substitutions on each branch for each site pattern and each substitution type
##' @param x An object of type \code{msa}
##' @param tm An object of type \code{tm}
##' @return An array giving the expected number of substitutions on each
##' branch, for each distinct alignment column, for each type of substitution.
##' @export
##' @example inst/examples/col-expected-subs-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
col.expected.subs.msa <- function(x, tm) {
if (!is.msa(x))
stop("x is not an MSA object")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
tm <- as.pointer.tm(tm)
rphast.simplify.list(.Call.rphast("rph_msa_exp_col_subs",
x$externalPtr, tm$externalPtr))
}
##' Likelihood of an alignment given a tree model
##' @title MSA Likelihood
##' @param x An object of class \code{msa} representing the multiple alignment
##' @param tm An object of class \code{tm} representing the tree and model of
##' substitution
##' @param features A features object. If non-null, compute likelihoods
##' for each feature rather than the whole alignment.
##' @param by.column Logical value. If \code{TRUE}, return the log
##' likelihood for each alignment column rather than total log
##' likelihood.
##' Ignored if features is not NULL.
##' @return Either the log likelihood of the entire alignment (if
##' \code{by.column==FALSE && is.null(features)},
##' or a numeric vector giving the log likelihood of each feature
##' (if \code{!is.null(features)}), or a numeric vector giving the
##' log likelihood of each column (if \code{by.column==TRUE}).
##' @seealso \code{phyloFit}, \code{tm}
##' @keywords msa tm features
##' @export
##' @example inst/examples/likelihood-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
likelihood.msa <- function(x, tm, features=NULL, by.column=FALSE) {
if (is.null(features))
check.arg(by.column, "by.column", "logical", null.OK=FALSE)
else {
if (is.null(features$externalPtr))
features <- as.pointer.feat(features)
else features <- copy.feat(features) # if we don't make a copy features gets
# destroyed by re-mapping coordinates to msa
if (by.column) {
warning("by.column ignored when features is provided")
by.column <- FALSE
}
}
if (!is.msa(x))
stop("x is not an MSA object")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
if (by.column && !is.ordered.msa(x))
warning("by.column may not be a sensible option for unordered MSA")
tm <- as.pointer.tm(tm)
.Call.rphast("rph_msa_likelihood", x$externalPtr, tm$externalPtr,
features$externalPtr,
by.column)
}
##' Simulate a MSA given a tree model and HMM.
##'
##' Simulates a multiple sequence alignment of specified length. Deals
##' with base-substitution only, not indels. If one tree model is given,
##' simply simulates a sequence from this model. If an HMM is provided,
##' then the mod parameter should be a list of tree models with the same
##' length as the number of states in the HMM.
##' @param object An object of type \code{tm} (or a list of these objects)
##' describing the phylogenetic model from which to simulate. If it
##' is a list of tree models then an HMM should be provided to describe
##' transition rates between models. Currently only models of order zero
##' are supported, and if multiple models are given, they are currently
##' assumed to have the same topology.
##' @param nsim The number of columns in the simulated alignment.
##' @param seed A random number seed. Either \code{NULL} (the default;
##' do not re-seed random number generator), or an integer to be sent to
##' set.seed.
##' @param hmm an object of type HMM describing transitions between the
##' tree models across the columns of the alignment.
##' @param get.features (For use with hmm). If \code{TRUE}, return object will
##' be a list of length two. The first element will be the alignment, and the
##' second will be an object of type \code{feat} describing the path through
##' the phylo-hmm in the simulated alignment.
##' @param pointer.only (Advanced use only). If TRUE, return only a pointer
##' to the simulated alignment. Possibly useful for very (very) large
##' alignments.
##' @param ... Currently not used (for S3 compatibility)
##' @return An object of type MSA containing the simulated alignment.
##' @keywords msa hmm
##' @export
##' @export simulate.msa
##' @author Melissa J. Hubisz and Adam Siepel
##' @importFrom stats simulate
##' @method simulate msa
##' @example inst/examples/simulate-msa.R
##' @note Currently only supports HMMs in which the models for each state
##' have the same topologies.
simulate.msa <- function(object, nsim, seed=NULL, hmm=NULL, get.features=FALSE,
pointer.only=FALSE, ...) {
nsites <- nsim
mod <- object
check.arg(get.features, "get.features", "logical", null.OK=FALSE, min.length=1L,
max.length=1L)
if (!is.null(seed)) set.seed(seed)
nstate <- 1L
if (!is.null(hmm))
nstate <- nstate.hmm(hmm)
if (is.tm(mod)) {
tmlist <- list(mod)
} else tmlist <- mod
nmod <- length(tmlist)
if (nstate != nmod)
stop("number of states in HMM (", nstate, ") does not match number of models (", nmod,")")
for (i in 1:nmod) {
if (!is.tm(tmlist[[i]]))
stop("mod should be a list of tree models (one for every state of HMM)")
tmlist[[i]] <- (as.pointer.tm(tmlist[[i]]))$externalPtr
}
if (!is.null(hmm)) hmm <- (as.pointer.hmm(hmm))$externalPtr
if ((!is.null(hmm)) && get.features) {
result <- .Call.rphast("rph_msa_base_evolve", tmlist, nsites, hmm,
get.features)
if (!pointer.only) {
result$msa <- from.pointer.msa(result$msa)
}
result$feats <- as.data.frame.feat(result$feats)
return(result)
}
x <- .makeObj.msa()
x$externalPtr <- .Call.rphast("rph_msa_base_evolve", tmlist, nsites, hmm,
get.features)
if (pointer.only == FALSE)
x <- from.pointer.msa(x)
x
}
##' Sample columns from an MSA
##' @param x An object of type \code{msa}
##' @param size The number of columns to sample
##' @param replace Whether to sample with replacement
##' @param prob A vector of probability weights for sampling each column;
##' \code{prob=NULL} implies equal probability for all columns. Probabilities
##' need not sum to one but should be non-negative and can not all be zero.
##' @param pointer.only If \code{TRUE}, return only a pointer to an alignment
##' object stored in C (useful for large objects; advanced use only).
##' @return An object of type \code{msa} with columns randomly
##' re-sampled from the original
##' @note This function is implemented using R's sample function in
##' conjunction with "[.msa". It will not alter the value of x even if it
##' is stored as a pointer.
##' @method sample msa
##' @keywords msa
##' @export sample.msa
##' @export
##' @example inst/examples/sample-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
sample.msa <- function(x, size, replace=FALSE, prob=NULL, pointer.only=FALSE) {
check.arg(size, "size", "integer", null.OK=FALSE)
check.arg(replace, "replace", "logical", null.OK=FALSE)
if (!is.null(prob)) prob <- rep(prob, length.out=ncol.msa(x))
if (size > ncol.msa(x) && replace==FALSE)
stop("cannot sample more columns than in msa unless replace=TRUE")
x[,sample(1:ncol.msa(x), size, replace=replace, prob=prob),
pointer.only=pointer.only]
}
##' Extract fourfold degenerate sites from an MSA object
##' @param x An object of type \code{msa}
##' @param features an object of type \code{feat}. Should have defined coding regions
##' with feature type "CDS"
##' @return An unordered msa object containing only the sites which are
##' fourfold degenerate.
##' @note \itemize{
##' {If x is stored as a pointer, it will be
##' reduced to four-fold degenerate sites, so the original alignment will be
##' lost. Use get4d.msa(copy.msa(x), features) to avoid this behavior. The
##' return value will always be stored in R regardless of how the original
##' alignment was stored.}
##' \item{For very large MSA objects it is more efficient to use the do.4d option
##' in the read.msa function instead.}}
##' @export
##' @example inst/examples/get4d-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
get4d.msa <- function(x, features) {
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
if (is.null(features$externalPtr) && sum(features$feature=="CDS")==0L)
stop("features has no features labelled \"CDS\"... cannot extract 4d sites")
if (is.null(features$externalPtr)) {
features <- as.pointer.feat(features)
} else features <- copy.feat(features)
x$externalPtr <- .Call.rphast("rph_msa_reduce_to_4d",
x$externalPtr,
features$externalPtr)
from.pointer.msa(x)
}
##' Extract features from an MSA object
##'
##' Returns the subset of the MSA which appears in the features object.
##' @param x An object of type MSA
##' @param features An object of type \code{features} denoting the regions
##' of the alignment to extract.
##' @param do4d If \code{TRUE}, then some elements of features must have type "CDS", and only
##' fourfold-degenerate sites will be extracted.
##' @param pointer.only If \code{TRUE}, return only a pointer to an object
##' stored in C (useful for large alignments; advanced use only)
##' @return An msa object containing only the regions of x
##' appearing in the features object.
##' @note If x was loaded with \code{pointer.only==TRUE}, then x
##' will be modified to the return value of the function.
##' Use \code{extract.feature.msa(copy.msa(x), features,...)}
##' if you don't want this behavior!
##' @seealso \code{sub.msa}, \code{[.msa}
##' @keywords msa features
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
extract.feature.msa <- function(x, features, do4d=FALSE, pointer.only=FALSE) {
if (!is.ordered.msa(x))
stop("extract.feature.msa requires ordered alignment")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
if (do4d) {
if (sum(features$feature=="CDS")==0L)
stop("features has no elements of type \"CDS\"... cannot extract 4d sites")
rv <- get4d.msa(x, features)
} else {
if (is.null(features$externalPtr)) {
features <- as.pointer.feat(features)
} else features <- copy.feat(features)
rv <- .makeObj.msa()
rv$externalPtr <- .Call.rphast("rph_msa_extract_feature",
x$externalPtr,
features$externalPtr)
}
if (!is.null(rv$externalPtr)) {
if (!pointer.only)
rv <- from.pointer.msa(rv)
}
rv$is.ordered <- FALSE
rv
}
##' Concatenate msa objects
##'
##' If the MSAs do not contain the same set of sequences, the sequences
##' will be added to each MSA and filled with missing data. The order
##' of sequences is taken from the first MSA, and sequences are added to
##' this as necessary.
##' @param msas A list of MSA objects to concatenate together.
##' @param ordered If FALSE, disregard the order of columns in the combined
##' MSA.
##' @param pointer.only (Advanced use only, for very large MSA objects) If
##' TRUE, return object will be a pointer to an object stored in C.
##' @return An object of type MSA
##' @note None of the msas passed to this function will be altered, even if
##' they are stored as pointers to objects in C.
##' @keywords msa
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
concat.msa <- function(msas, ordered=FALSE, pointer.only=FALSE) {
# have to do a little dance to make sure this behaves OK if
# some msas are empty
if (!is.list(msas))
stop("concat.msa expects list of msa objects")
isZero <- logical(length(msas))
for (i in 1:length(msas)) {
if (is.null(msas[[i]]$externalPtr))
msas[[i]] <- as.pointer.msa(msas[[i]])
isZero[i] <- (ncol.msa(msas[[i]]) == 0L)
}
if (sum(isZero) > 0L)
msas[isZero] <- NULL
if (length(msas) == 0L) return(NULL)
aggMsa <- copy.msa(msas[[1]])
if (is.null(aggMsa$externalPtr))
aggMsa <- as.pointer.msa(aggMsa)
if (length(msas) >= 2L) {
for (i in 2:length(msas)) {
aggMsa$externalPtr <- .Call.rphast("rph_msa_concat",
aggMsa$externalPtr,
msas[[i]]$externalPtr)
}
}
if (pointer.only == FALSE)
aggMsa <- from.pointer.msa(aggMsa)
aggMsa
}
##' Split an MSA by feature
##' @param x An object of type \code{msa}
##' @param f An object of type \code{feat}
##' @param drop Not currently used
##' @param pointer.only If \code{TRUE}, returned list elements are pointers to
##' objects stored in C (advanced use only).
##' @param ... Not currently used
##' @return A list of msa objects, representing the sub-alignments for
##' each element in f
##' @note Neither x nor f will be altered by this function if they are stored
##' as pointers.
##' @keywords msa features
##' @method split by.feature.msa
##' @export split.by.feature.msa
##' @export
##' @example inst/examples/split-by-feature-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
split.by.feature.msa <- function(x, f, drop=FALSE, pointer.only=FALSE, ...) {
check.arg(pointer.only, "pointer.only", "logical", null.OK=FALSE)
if (is.null(x$externalPtr)) x <- as.pointer.msa(x)
if (is.null(f$externalPtr)) {
f <- as.pointer.feat(f)
} else f <- copy.feat(f)
rv <- .Call.rphast("rph_msa_split_by_gff", x$externalPtr,
f$externalPtr)
if (!pointer.only) {
for (i in 1:length(rv))
rv[[i]] <- from.pointer.msa(rv[[i]])
}
rv
}
##' Get informative regions of an alignment
##' @param x An object of type \code{msa}.
##' @param min.numspec The minimum number of species with non-missing data
##' required for an alignment column to be considered informative.
##' @param spec A character vector of species names, or an integer vector
##' of species indices. Only data in
##' the named species count towards deciding if a site is informative. The
##' default value of \code{NULL} implies use all species in the alignment.
##' @param refseq Defines the frame of reference for the return value. Should
##' be a character vector with the name of one of the sequences in the
##' alignment, or NULL to indicate use the frame of reference of the entire
##' alignment.
##' @param gaps.inf Logical value indicating whether a gap should be considered
##' informative. The default value of \code{FALSE} indicates that gaps as
##' well as missing data are not counted as informative.
##' @return An object of type \code{feat} indicating the regions of the
##' alignment which meet the informative criteria. Note that unless
##' \code{refseq==NULL}, columns with gaps in the reference
##' sequence will be ignored, and will fall in "informative" or "uninformative"
##' features based on the informativeness of neighboring columns.
##' @note \itemize{
##' \item{If the msa object has an idx.offset, it is assumed to be a coordinate
##' offset for the first species in the alignment. So the idx.offset will
##' be added to the coordinates in the returned features object only if
##' \code{refseq==names.msa(x)[1]}.}
##' \item{This function will not alter the value of x even if it is stored as
##' a pointer.}}
##' @keywords msa
##' @export
##' @example inst/examples/informative-regions-msa.R
##' @author Melissa J. Hubisz and Adam Siepel
informative.regions.msa <- function(x, min.numspec, spec=NULL,
refseq=names.msa(x)[1], gaps.inf=FALSE) {
numspec <- nrow.msa(x)
check.arg(min.numspec, "min.numspec", "integer", null.OK=FALSE)
check.arg(gaps.inf, "gaps.inf", "logical", null.OK=FALSE)
check.arg(refseq, "refseq", "character", null.OK=TRUE)
if (min.numspec <= 0L || min.numspec > numspec)
stop("min.numspec expected to be between 1 and ", numspec)
if (!is.null(spec)) {
if (is.integer(spec)) {
check.arg(spec, "spec", "integer", null.OK=TRUE, min.length=1L,
max.length=numspec)
if (sum(spec <= 0 | spec > numspec) > 0L)
stop("expected spec values between 1 and ", numspec)
} else {
check.arg(spec, "spec", "character", null.OK=TRUE, min.length=1L,
max.length=nrow.msa(x))
intspec <- as.integer(sapply(spec, function(s) {which(s==names(x))}))
if (sum(is.na(intspec)) > 0L)
stop("don't know species names ", spec[is.na(intspec)])
spec <- intspec
}
}
# compute index of refseq to use in .Call.rphast function below
if (is.null(refseq)) {
refseq <- 0
} else {
intrefseq <- which(refseq==names(x))
if (length(intrefseq) == 0L)
stop("don't know refseq name ", refseq)
refseq <- intrefseq
}
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
feats <- .makeObj.feat(TRUE)
feats$externalPtr <- .Call.rphast("rph_msa_informative_feats",
x$externalPtr, min.numspec, spec, refseq,
gaps.inf)
as.data.frame.feat(feats)
}
##' Clean an alignment for codon analysis
##' @param x An object of type \code{msa}
##' @param refseq The name of the reference sequence to be used. If given,
##' strip all columns which contain gaps in refseq. Once this is done,
##' alignment should be in frame. If \code{refseq==NULL} then alignment
##' should be in frame as it is sent in (no gaps are stripped).
##' @param strand Either "+" or "-". If "-", reverse complement the
##' alignment.
##' @return An object of type \code{msa}. It will be the same as the
##' original msa, with the following modifications:
##' \itemize{
##' \item If refseq is not NULL, columns with gaps in refseq will be stripped.
##' \item If strand is "-", the new msa will be the reverse complement of
##' the original.
##' \item After the gap stripping and reverse complementing steps, each
##' sequence is searched for stop codons. If encountered, the stop codon
##' and the rest of the sequence to follow is converted to missing data. The
##' resulting msa has a length equal to the longest remaining sequence (end
##' columns with all missing data are removed).
##' }
##' @note If the input msa (x) is stored as a pointer, its value will be
##' changed to the return value of the function.
##' @export
##' @author Melissa J. Hubisz
codon.clean.msa <- function(x, refseq=NULL, strand="+") {
check.arg(refseq, "refseq", "character", null.OK=TRUE, min.length=1L,
max.length=1L)
if (!is.msa(x)) stop("x should be object of type msa")
check.arg(strand, "strand", "character", null.OK=FALSE, min.length=1L,
max.length=1L)
if (is.null(x$externalPtr)) {
x <- as.pointer.msa(x)
pointer.only <- FALSE
} else pointer.only <- TRUE
.Call.rphast("rph_msa_codon_clean", x$externalPtr, refseq, strand)
if (pointer.only == FALSE) {
x <- from.pointer.msa(x)
}
x
}
##' Get the observed frequencies of states in an alignment
##' @param align An object of type \code{msa}.
##' @param mod An object of type \code{tm} representing a tree model.
##' @return A numeric vector giving the observed frequencies of each state
##' in the model
##' @export
##' @author Melissa J. Hubisz and Adam Siepel
state.freq.msa <- function(align, mod) {
if (!is.msa(align)) stop("align should be object of type msa")
if (is.null(align$externalPtr))
align <- as.pointer.msa(align)
mod <- as.pointer.tm(mod)
.Call.rphast("rph_msa_get_base_freqs_tuples", align$externalPtr,
mod$externalPtr)
}
##' Get the frequencies of characters in an alignment
##' @param x An object of type \code{msa}
##' @param seq A vector of character strings identifying the sequence(s)
##' to get base frequencies for. If \code{NULL}, use all sequences.
##' @param ignore.missing If TRUE, ignore missing data characters ("N" and "?").
##' Must be TRUE if seq is stored as a pointer.
##' @param ignore.gaps If TRUE, ignore gaps. Must be TRUE if seq is stored
##' as a pointer.
##' @return A data frame with one row for each unique state (usually
##' "A", "C", "G", "T", and possibly "N", "?", "-", counts for
##' each state, and overall frequency of each state.
##' @seealso \code{statfreq.msa}, which gets observed frequencies of states
##' in an alignment with respect to a substitution model, and works for
##' pointers.
##' @export
##' @author Melissa J. Hubisz
base.freq.msa <- function(x, seq=NULL, ignore.missing=TRUE,
ignore.gaps=TRUE) {
if (!is.msa(x)) stop("x should be object of type msa")
check.arg(seq, "seq", "character", null.OK=TRUE, min.length=1L,
max.length=NULL)
check.arg(ignore.missing, "ignore.missing", "logical", null.OK=FALSE,
min.length=1L, max.length=1L)
check.arg(ignore.gaps, "ignore.gaps", "logical", null.OK=FALSE,
min.length=1L, max.length=1L)
if (!is.null(x$externalPtr)) {
if (!is.null(seq)) x <- sub.msa(x, seq, pointer.only=TRUE)
if (ignore.missing != TRUE)
stop("ignore.missing must be TRUE in base.freq.msa if x is stored as a pointer")
if (ignore.gaps != TRUE)
stop("ignore.gaps must be TRUE in base.freq.msa if x is stored as a pointer")
return(rphast.simplify.list(.Call.rphast("rph_msa_base_freq",
x$externalPtr)))
}
if (is.null(x$seqs)) stop("x does not have element named seqs")
chars <- NULL
for (i in 1:nrow.msa(x)) {
if (is.null(seq) || is.element(names(x)[i], seq))
chars <- c(chars, strsplit(x$seqs[i], split='')[[1]])
}
if (ignore.missing) chars <- chars[chars != "?" & chars != "N"]
if (ignore.gaps) chars <- chars[chars != "-"]
if (length(chars) == 0L)
stop("No sequences found")
result <- table(chars)
rv <- data.frame(states=names(result), counts=as.integer(result))
rv <- data.frame(rv, freq=rv$counts/sum(rv$counts))
rv
}
##' Get codon frequencies based on 3x4 model
##' @param x An object of type msa. It is assumed to represent in-frame codons.
##' Length should be a multiple of 3.
##' @return A vector of length 64 corresponding to the 64 codon frequencies.
##' The frequencies corresponding to stop codons should be 0.
##' @author Melissa J. Hubisz
##' @export
freq3x4.msa <- function(x) {
if (!is.msa(x)) stop("x should be object of type msa")
if (is.null(x$externalPtr))
x <- as.pointer.msa(x)
.Call.rphast("rph_msa_freq3x4", x$externalPtr)
}
##' Get the fraction of G's and C's in an alignment
##' @param x An object of type \code{msa}
##' @param seq A vector of character strings identifying the sequence(s)
##' to use in the base frequency tabulation. If \code{NULL}, use all
##' sequences.
##' @param ignore.missing If \code{FALSE}, count missing data in the
##' denominator.
##' @param ignore.gaps If \code{TRUE}, count gaps in the denominator.
##' @return The fraction of bases which are C's and G's
##' @export
##' @author Melissa J. Hubisz
gc.content.msa <- function(x, seq=NULL, ignore.missing=TRUE,
ignore.gaps=TRUE) {
df <- base.freq.msa(x, seq, ignore.missing, ignore.gaps)
sum(df[df$states=="C" | df$states=="c" | df$states=="G" | df$states=="g","freq"])
}
##' Get pairwise differences per site between sequences
##' @param x An object of type \code{msa}
##' @param seq1 A character vector or integer index indicating seq1 (see Value)
##' @param seq2 A character vector of integer index indicating seq2. Can only be
##' provided if seq1 is provided.
##' @param ignore.missing A logical value indicating whether to compare
##' sites where either sequence has missing data.
##' @param ignore.gaps A logical value indicating whether to compare sites
##' where either sequence contains a gap.
##' @return If seq1 and seq2 are provided, returns a numeric value giving
##' the fraction of sites in the alignment where seq1 and seq2 differ (or
##' zero if there are no sites to compare). If seq1 is provided and seq2
##' is NULL, returns a numeric vector giving this value for seq1 compared
##' to every sequence (including itself; order of results is same as order of
##' sequences in alignment). If both seq1 and seq2 are NULL, returns a matrix
##' giving this value for every sequence compared with every other sequence.
##' @export
##' @seealso \code{ninf.msa} To count the number of non-gap and non-missing character
##' @author Melissa J. Hubisz
pairwise.diff.msa <- function(x, seq1=NULL, seq2=NULL, ignore.missing=TRUE,
ignore.gaps=TRUE) {
if (!is.msa(x)) stop("x should be an object of type msa")
check.arg(ignore.missing, "ignore.missing", "logical", null.OK=FALSE)
check.arg(ignore.gaps, "ignore.gaps", "logical", null.OK=FALSE)
if ((!is.null(seq2)) && is.null(seq1))
stop("seq2 can only be provided if seq1 is provided")
if (!is.null(seq1)) {
if (length(seq1) != 1L) stop("seq1 should have length 1")
if (is.character(seq1)) {
newseq1 <- which(names(x) == seq1)
if (length(newseq1) == 0L) stop("no sequence named ", seq1, " in alignment")
seq1 <- newseq1
}
}
if (!is.null(seq2)) {
if (length(seq2) != 1L) stop("seq2 should have length 1")
if (is.character(seq2)) {
newseq2 <- which(names(x) == seq2)
if (length(newseq2) == 0L) stop("no sequence named ", seq2, " in alignment")
seq2 <- newseq2
}
}
if (is.null(x$externalPtr)) x <- as.pointer.msa(x)
rphast.simplify.list(.Call.rphast("rph_msa_fraction_pairwise_diff",
x$externalPtr, seq1, seq2,
ignore.missing, ignore.gaps))
}
##' Get amino acid sequences from an alignment
##' @param m An object of type \code{msa} representing the alignment. The
##' alignment is assumed to be coding sequence, already in frame.
##' @param one.frame A logical value indicating whether to use the same frame for
##' all species in the alignment, or a separate frame for each species. If
##' \code{one.frame==TRUE} then every three columns of the alignment is translated
##' into a codon, regardless of gaps within the alignment. If
##' \code{one.frame==FALSE}, gaps will shift the frame in the species where they
##' occur. In this case, the length of the seqeunces returned may not all be the
##' same.
##' @param frame An integer specifying an offset from the first column of the
##' alignment where the coding region starts. The default 1 means start at
##' the beginning. If \code{one.frame==FALSE}, frame can be a vector of integers,
##' one for each species. Otherwise it should be a single value.
##' @return A vector of character strings representing the translated alignment.
##' The characters are amino acid codes, with '$' representing a stop codon,
##' and '*' denoting missing data or a codon with 1 or 2 gaps, and '-' denoting
##' a codon with all gaps.
##' @author Melissa J. Hubisz
##' @example inst/examples/translate-msa.R
##' @export
translate.msa <- function(m, one.frame=TRUE, frame=1) {
if (!is.msa(m)) stop("m is not MSA object")
one.frame <- check.arg(one.frame, "one.frame", "logical", null.OK=FALSE)
frame <- check.arg(frame, "frame", "integer", null.OK=FALSE, min.length=1L,
max.length=ifelse(one.frame, 1, nrow.msa(m)))
if (sum(frame <= 0 | frame >= ncol.msa(m)) > 0L)
stop("frame should only contain values between 1 and ncol.msa(m)-1")
if (!one.frame) frame <- rep(frame, length.out=nrow.msa(m))
if (is.null(m$externalPtr))
m <- as.pointer.msa(m)
.Call.rphast("rph_msa_translate", m$externalPtr, one.frame, as.integer(frame-1))
}
#TODO :implement pretty option
# new options not implemented in plot.msa yet:
## @param color.nonsyn If not \code{NULL}, use this color for codons that
## do not match the codon in the first sequence. strand and frame.start
## should be set appropriately.
## @param strand (For use with color.nonsyn) Either "+" or "-", indicating
## the strand to be used for translating DNA and determining amino acids.
## @param frame.start (For use with color.nonsyn) An integer from 1-3,
## indicating the frame of the first base in the plot (1==first codon
## position, 2=second codon position, 3=third). If strand=="-",
## frame.start is the frame of the base with the highest coordinate.
##' Plot an alignment
##' @param x An object of type \code{msa}
##' @param refseq A character string naming the reference sequence to use
##' (NULL implies frame of reference of entire alignment).
##' @param add If \code{TRUE}, add to the current plot
##' @param xlim (Only used when \code{add==FALSE}. A vector of length 2
##' giving the coordinate range to plot in
##' terms of refseq coordinates. If NULL use entire range of alignment.
##' @param ylim (Only used when \code{add==TRUE}. The limits to use on the
##' y-axis.
##' @param pretty If \code{TRUE}, display bases as dots which are in 2nd or
##' higher row and are identical to corresponding base in 1st row.
##' @param min.char.size The smallest value (in inches) that a character can
##' be. If characters need to be smaller than this, skip the plot.
##' @param nuc.text If not NULL, can be a vector of character strings. Each
##' character string should be the same length as the MSA with respect to refseq.
##' Each string will be displayed in its own row along with the alignment.
##' @param nuc.text.pos If nuc.text is not NULL, can be either "top" or "bottom"
##' to indicate where to place nuc.text relative to the alignment. Will be recycled
##' to the length of nuc.text.
##' @param nuc.text.col If nuc.text is not NULL, color to be used for printing nuc.text. Will
##' be recycled to the length of nuc.text.
##' @param ... Additional arguments to be passed to plot()
##' @method plot msa
##' @export plot.msa
##' @export
##' @example inst/examples/plot-msa.R
##' @author Melissa J. Hubisz
plot.msa <- function(x, refseq=names.msa(x)[1],
xlim=NULL, ylim=c(0,1),
add=FALSE, pretty=FALSE, min.char.size=0.05,
nuc.text=NULL, nuc.text.pos="bottom",
nuc.text.col="black",
...) {
if (!is.msa(x)) stop("first argument to plot.msa should be msa object")
# if (!is.null(color.nonsyn)) {
# color.nonsyn <- check.arg(color.nonsyn, "character", null.OK=FALSE)
# strand <- check.arg(strand, "character", null.OK=FALSE)
# if (strand != "+" && strand != "-") stop("strand should be \"+\" or \"-\"")
# frame.start <- check.arg(frame.start, "integer", null.OK=FALSE)
# if (frame.start < 1L || frame.start > 3L)
# stop("frame.start should be 1, 2, or 3")
# }
coordRange <- coord.range.msa(x, refseq)
if (add) {
xlim <- par("usr")[1:2]
}
if (is.null(xlim)) {
xlim <- coordRange + c(-0.5,.5)
xrange <- coordRange
} else {
xrange <- c(max(floor(xlim[1]), coordRange[1]),
min(ceiling(xlim[2]), coordRange[2]))
}
if (!add) {
plot(c(0), c(0), type="n", xlim=xlim, ylim=ylim, yaxt="n", ylab="",
xlab=sprintf("Corodinate with respect to %s ",
ifelse(is.null(refseq), "alignment", refseq)),
xaxs="i",
...)
}
yrange <- par("usr")[3:4] #y coordinate min and max
width <- par("pin")[1] # width of plot window in inches
height <- par("pin")[2]/(yrange[2]-yrange[1])*(ylim[2]-ylim[1]) # height of window in inches
numseq <- length(names.msa(x))
numrow <- numseq
if (!is.null(nuc.text)) {
exp.length <- ncol.msa(x, refseq=refseq)
for (i in 1:length(nuc.text)) {
if (nchar(nuc.text[i]) != exp.length)
stop(nuc.text[i], " should be length ", exp.length, " got length ",
nchar(nuc.text[i]))
nuc.text[i] <- substr(nuc.text[i], xrange[1]-coordRange[1]+1,
xrange[2] - coordRange[1]+1)
}
nuc.text.pos <- rep(nuc.text.pos, length.out=length(nuc.text))
if (sum(nuc.text.pos != "top" & nuc.text.pos != "bottom") > 0L)
stop("all elements of nuc.text.pos should be \"top\" or \"bottom\"")
nuc.text.col <- rep(nuc.text.col, length.out=length(nuc.text))
numrow <- numrow + length(nuc.text)
}
x <- sub.msa(x, start.col=xrange[1], end.col=xrange[2],
refseq=refseq)
numch <- ncol.msa(x, refseq=refseq)
chWidth <- width/numch # amount of available space per character horizontal
chHeight <- height/(numrow) # amount of available space per character vertical
if (chWidth < min.char.size || chHeight < min.char.size) {
warning("plot.msa could not plot alignment; characters too small")
return(invisible(NULL))
}
cexHeight <- chHeight/(par("cex")*par("cin")[2]) # this value of cex would make characters fill up all space vertically
cexHeight <- cexHeight*0.75 # leave some vertical space between lines
cexWidth <- chWidth/(par("cex")*par("cin")[1]) # this value of cex would mkae characters fill up all space horizontally
cexWidth <- cexWidth*0.95 # leave a little space between characters
textCex <- min(c(cexHeight, cexWidth)) #use the minimum between the two
# cexHeight <- textCex*par("cex")*par("cin")[2]/3
if (!is.null(refseq)) {
# to-do: make strip.gaps return features with location/lengths of
# gaps in refseq and plot those?
x <- strip.gaps.msa(x, strip.mode=refseq)
}
#vertical spacing
# first try allowing half a character height between each line
# if that doesn't fit then space evenly
y <- (1:numrow)*textCex*1.25*par("cex")*par("cin")[2] # this is in inches
if (max(y)-min(y) < height) { # the optimal spacing fits vertically
y <- y/height*(ylim[2] - ylim[1]) # convert from inches to coordinates
y <- y - mean(y) + (ylim[2] - ylim[1])/2 + ylim[1]
} else {
y <- seq(from=ylim[1], to=ylim[2], length.out=numrow+2)[2:(numseq+1)]
}
y <- rev(y)
longestName <- max(nchar(names.msa(x)))
marSize <- par("mai")[2] #margin size on left side
nameCex <- min(marSize/(par("cin")[1]*longestName), cexHeight)
yidx <- 1
if (!is.null(nuc.text) && sum(nuc.text.pos=="top") > 0L) {
f <- nuc.text.pos=="top"
for (i in 1:sum(f)) {
text(x=seq(from=xrange[1], to=xrange[2], by=1), y=y[yidx],
labels=strsplit(nuc.text[f][i], "")[[1]], cex=textCex,
col=nuc.text.col[i])
yidx <- yidx+1
}
}
for (i in 1:numseq) {
chars <- strsplit(x$seq[i], "")[[1]]
if (pretty) {
if (i == 1L) {
firstChars <- chars
} else {
chars[chars == firstChars] <- "."
}
}
text(x=seq(from=xrange[1], to=xrange[2], by=1),
y=y[yidx],
labels=chars, cex=textCex)
mtext(names.msa(x)[i], line=0.5, side=2, at=y[yidx], las=1, cex=nameCex, ...)
yidx <- yidx + 1
}
if (!is.null(nuc.text) && sum(nuc.text.pos=="bottom") > 0L) {
f <- nuc.text.pos=="bottom"
for (i in 1:sum(f)) {
text(x=seq(from=xrange[1], to=xrange[2], by=1), y=y[yidx],
labels=strsplit(nuc.text[f][i], "")[[1]], cex=textCex,
col=nuc.text.col[f][i])
yidx <- yidx+1
}
}
invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.