Nothing
#' Writes out a morphological #NEXUS data file
#'
#' @description
#'
#' Writes out a morphological data file in #NEXUS format.
#'
#' @param cladistic_matrix The cladistic matrix in the format imported by \link{read_nexus_matrix}.
#' @param file_name The file name to write to. Should end in \code{.nex}.
#'
#' @details
#'
#' Writes out a #NEXUS (Maddison et al. 1997) data file representing the distribution of characters in a set of taxa. Data must be in the format created by importing data with \link{read_nexus_matrix}.
#'
#' @author Graeme T. Lloyd \email{graemetlloyd@@gmail.com}
#'
#' @seealso \link{write_tnt_matrix}
#'
#' @references
#'
#' Maddison, D. R., Swofford, D. L. and Maddison, W. P., 1997. NEXUS: an extensible file format for systematic information. \emph{Systematic Biology}, \bold{46}, 590-621.
#'
#' @seealso
#'
#' \link{build_cladistic_matrix}, \link{compactify_cladistic_matrix}, \link{prune_cladistic_matrix}, \link{read_nexus_matrix}, \link{safe_taxonomic_reduction}, \link{write_tnt_matrix}
#'
#' @examples
#'
#' # Write out Michaux 1989 to current working directory:
#' write_nexus_matrix(cladistic_matrix = michaux_1989, file_name = "michaux_1989.nex")
#'
#' # Remove file when finished:
#' file.remove(file1 = "michaux_1989.nex")
#' @export write_nexus_matrix
write_nexus_matrix <- function(cladistic_matrix, file_name) {
# Check cladistic_matrix has class cladisticMatrix and stop and warn user if not:
if (!inherits(x = cladistic_matrix, what = "cladisticMatrix")) stop("cladistic_matrix must be an object of class \"cladisticMatrix\".")
# Subfunction to convert matrices back to symbols, missing and gap characters:
convert_matrix <- function(data_matrix) {
# If there are missing characters replace with missing symbol:
if (any(is.na(data_matrix$matrix))) data_matrix$matrix[is.na(data_matrix$matrix)] <- data_matrix$characters$missing
# If there are gap characters replace with gap symbol:
if (sum(as.vector(data_matrix$matrix) == "") > 0) data_matrix$matrix[data_matrix$matrix == ""] <- data_matrix$characters$gap
# If there are symbols (i.e., non-continuous data):
if (length(x = data_matrix$characters$symbols) > 0) {
# In reverse order go through numbers:
for (i in rev(data_matrix$characters$symbols)) {
# Replace current number with appropriate symbol:
if (length(x = grep(as.character(which(x = data_matrix$characters$symbols == i) - 1), data_matrix$matrix)) > 0) data_matrix$matrix <- gsub(pattern = as.character(which(x = data_matrix$characters$symbols == i) - 1), replacement = i, x = data_matrix$matrix)
}
}
# If there are uncertainties:
if (length(x = grep("/", data_matrix$matrix)) > 0) {
# Find cells that have uncertainties:
uncertainties <- grep("/", data_matrix$matrix)
# Repalce with all possible values in curly braces:
data_matrix$matrix[uncertainties] <- paste("{", unlist(x = lapply(X = strsplit(data_matrix$matrix[uncertainties], split = "/"), paste, collapse = "")), "}", sep = "")
}
# If there are polymorphisms:
if (length(x = grep("&", data_matrix$matrix)) > 0) {
# Find cells with polymorphsims:
polymorphisms <- grep("&", data_matrix$matrix)
# Repale with values inside parentheses:
data_matrix$matrix[polymorphisms] <- paste("(", unlist(x = lapply(X = strsplit(data_matrix$matrix[polymorphisms], split = "&"), paste, collapse = "")), ")", sep = "")
}
# Get equal length taxon names (with added spaces):
taxon_names_with_spaces <- paste(rownames(x = data_matrix$matrix), unlist(x = lapply(X = lapply(X = as.list(x = (max(nchar(x = rownames(x = data_matrix$matrix))) + 2) - nchar(x = rownames(x = data_matrix$matrix))), rep, x = " "), paste, collapse = "")), sep = "")
# If block is continuous:
if (data_matrix$datatype == "CONTINUOUS") {
# Format rows with spaces between values:
data_matrix$matrix <- paste(taxon_names_with_spaces, (apply(data_matrix$matrix, 1, paste, collapse = " ")), sep = "")
# If block is non-continuous:
} else {
# Format rows without spaces between values:
data_matrix$matrix <- paste(taxon_names_with_spaces, (apply(data_matrix$matrix, 1, paste, collapse = "")), sep = "")
}
# Return just the newly formatted matrix (now a vector):
return(data_matrix$matrix)
}
# Subfunction for collapsing strings of character numbers (i.e., for weight and ordering in assumptions block):
zip_string <- function(x) {
# Set up empty zipped version of strin:
zipped_string <- vector(mode = "character")
# As long as there are zip-able parts to x remaining:
while (any(diff(x) == 1)) {
# Find start of first zippable part:
zip_start <- which(x = diff(x) == 1)[1]
# Find length of zippable part:
zip_length <- rle(diff(x))$lengths[which(x = rle(diff(x))$values == 1)[1]]
# Use length and start to find end of zip:
zip_end <- zip_start + zip_length
# Add zippable part to string (and any preceding unzippable parts):
zipped_string <- c(zipped_string, setdiff(x = x[1:zip_end], y = x[zip_start:zip_end]), paste(x[zip_start], x[zip_end], sep = "-"))
# Cut x down ready to find next zippable part:
x <- x[-(1:zip_end)]
}
# If there are characters left (or there are no zippable parts):
if (length(x = x) > 0) zipped_string <- c(zipped_string, x)
# Collapse to a single string with spaces:
zipped_string <- paste(zipped_string, collapse = " ")
# Return zipped string:
return(zipped_string)
}
# Isolate just data blocks (i.e., cladistic_matrix without topper):
data_blocks <- cladistic_matrix[2:length(x = cladistic_matrix)]
# Get block names:
block_names <- unlist(x = lapply(X = data_blocks, "[[", "block_name"))
# Get data_types:
data_types <- unlist(x = lapply(X = data_blocks, "[[", "datatype"))
# Get number of taxa:
n_taxa <- unname(unlist(x = lapply(X = lapply(X = data_blocks, "[[", "matrix"), nrow))[1])
# Get number of characters:
n_characters <- unlist(x = lapply(X = lapply(X = data_blocks, "[[", "matrix"), ncol))
# Get symbols strings:
symbols <- unlist(x = lapply(X = lapply(X = lapply(X = data_blocks, "[[", "characters"), "[[", "symbols"), paste, collapse = " "))
# Get missing value:
missing <- unlist(x = lapply(X = lapply(X = data_blocks, "[[", "characters"), "[[", "missing"))
# Get gap symbol:
gap <- unlist(x = lapply(X = lapply(X = data_blocks, "[[", "characters"), "[[", "gap"))
# Conver matrices to vectors of text strings:
data_block_strings <- lapply(X = data_blocks, convert_matrix)
# Set up header block (returns empty string if nothing there):
header_block <- ifelse(nchar(x = cladistic_matrix$topper$header) > 0, paste("[", cladistic_matrix$topper$header, "]\n\n", sep = ""), "")
# Set up taxa block (only required if multiple matrix blocks as sets number of taxa, will be empty string otherwise):
taxa_block <- ifelse(length(x = data_blocks) > 1, paste("BEGIN TAXA;\n\tDIMENSIONS NTAX=", n_taxa, ";\n\tTAXLABELS\n\t\t", paste(rownames(x = cladistic_matrix$matrix_1$matrix), collapse = " "), "\n;\nEND;\n\n", sep = ""), "")
# Set up data block (only required if a single matrix block):
data_block <- ifelse(length(x = data_blocks) == 1, paste("BEGIN DATA;\n\tDIMENSIONS NTAX=", n_taxa, " NCHAR=", n_characters, " ;\n\tFORMAT DATATYPE=", data_types, " SYMBOLS=\" ", symbols, "\" MISSING=", missing, " GAP=", gap, " ;\n", sep = ""), "")
# Set up character block (including MATRIX that will begin data):
character_block <- ifelse(rep(length(x = data_blocks), length(x = data_blocks)) > 1, paste("BEGIN CHARACTERS;\n\t", ifelse(nchar(x = block_names) > 0, paste("TITLE ", block_names, ";\n", sep = ""), ""), "\tDIMENSIONS NCHAR=", n_characters, ";\n\tFORMAT DATATYPE=", ifelse(data_types == "CONTINUOUS", "CONTINUOUS ", paste(data_types, " SYMBOLS=\" ", symbols, "\" ", sep = "")), "MISSING=", missing, " GAP=", gap, " ;\nMATRIX\n\n", sep = ""), "MATRIX\n\n")
# Take character block and meld with matri(ces) into matrix block(s):
matrix_block <- paste(paste(character_block, unlist(x = lapply(X = data_block_strings, paste, collapse = "\n")), "\n;\nEND;\n\n", sep = ""), collapse = "")
# Make sure costmatrices are a list if null:
if (!is.list(cladistic_matrix$topper$costmatrices)) cladistic_matrix$topper$costmatrices <- list(NULL)
# Create costmatrix block:
costmatrix_block <- paste(
ifelse(
test = !unlist(
x = lapply(
X = cladistic_matrix$topper$costmatrices,
FUN = is.null
)
),
yes = paste(
paste(
"\tUSERTYPE ",
names(cladistic_matrix$topper$costmatrices),
" STEPMATRIX = ",
unlist(
x = lapply(
X = cladistic_matrix$topper$costmatrices,
FUN = function(x) ncol(x = x$costmatrix)
)
),
" ",
unlist(
x = lapply(
X = lapply(
X = as.list(x = names(x = cladistic_matrix$topper$costmatrices)),
FUN = function(x) {
block_costmatrix_appears_in <- which(
x = unlist(
x = lapply(
X = cladistic_matrix[2:length(x = cladistic_matrix)],
FUN = function(y) any(x = y$ordering == x))))[1]
cladistic_matrix[[(1 + block_costmatrix_appears_in)]]$characters$symbols[
1:cladistic_matrix$topper$costmatrices[[x]]$size
]
}
),
FUN = paste,
collapse = ""
)
),
"\n",
sep = ""
),
"\t",
unlist(x = lapply(X = lapply(X = lapply(X = cladistic_matrix$topper$costmatrices, function(x) {
diag(x = x$costmatrix) <- "."
if(any(x$costmatrix == Inf)) x$costmatrix[rownames(x = x$costmatrix), colnames(x = x$costmatrix)] <- gsub(pattern = Inf, replacement = "i", x = x$costmatrix)
return(x$costmatrix)
}), apply, 1, paste, collapse = " "), paste, collapse = "\n\t")),
"\n\t;\n",
sep = ""
),
no = ""
),
collapse = ""
)
# Get ordering of all characters in sequence:
ordering <- unlist(x = lapply(X = data_blocks, "[[", "ordering"))
# Get weights of all characters in sequence:
character_weights <- unlist(x = lapply(X = data_blocks, "[[", "character_weights"))
# Create options block (if no block names):
if (all(is.na(block_names))) options_block <- paste(ifelse(all(ordering == "unordered"), "\tOPTIONS DEFTYPE=unord PolyTcount=MINSTEPS ;\n", ifelse(all(ordering == "ordered"), "\tOPTIONS DEFTYPE=ord PolyTcount=MINSTEPS ;\n", "\tOPTIONS DEFTYPE=unord PolyTcount=MINSTEPS ;\n")), ifelse(length(x = unique(x = ordering)) == 1 && length(x = setdiff(x = unique(x = ordering), y = c("ordered", "unordered"))) == 0, "", paste("\tTYPESET * UNTITLED = ", paste(paste(sort(x = unique(x = ordering)), unlist(x = lapply(X = lapply(X = lapply(X = as.list(x = sort(x = unique(x = ordering))), "==", ordering), which), zip_string)), sep = ": "), collapse = ", "), ";\n", sep = "")), sep = "")
# Create options block (if there are block names):
if (!all(is.na(unlist(x = block_names)))) options_block <- paste(paste("\tTYPESET * UNTITLED (CHARACTERS = ", block_names, ") = ", unlist(x = lapply(X = lapply(X = data_blocks, "[[", "ordering"), function(x) paste(paste(paste(sort(x = unique(x = x)), unlist(x = lapply(X = lapply(X = lapply(X = as.list(x = sort(x = unique(x = x))), "==", x), which), zip_string)), sep = ": "), collapse = ", "), sep = ""))), ";\n", sep = ""), collapse = "")
# Replace cont with Squared if continuous characters present:
if (length(x = grep(" continuous: ", options_block)) > 0) options_block <- gsub(pattern = " continuous: ", replacement = " Squared: ", x = options_block)
# Replace ordered with ord if discrete characters present:
if (length(x = grep("ordered: ", options_block)) > 0) options_block <- gsub(pattern = "ordered: ", replacement = "ord: ", x = options_block)
# Convert continuous character weights to one before making weights block:
character_weights[ordering == "continuous"] <- 1
# Create weights block (if no block names):
if (all(is.na(block_names))) weights_block <- ifelse(all(character_weights == 1), "", paste("\tWTSET * UNTITLED = ", paste(paste(sort(x = unique(x = character_weights)), unlist(x = lapply(X = lapply(X = lapply(X = as.list(x = sort(x = unique(x = character_weights))), "==", character_weights), which), zip_string)), sep = ": "), collapse = ", "), ";\n", sep = ""))
# Create weights block (if there are block names):
if (!all(is.na(unlist(x = block_names)))) weights_block <- paste(paste("\tWTSET * UNTITLED (CHARACTERS = ", block_names, ") = ", unlist(x = lapply(X = lapply(X = data_blocks, "[[", "character_weights"), function(x) paste(paste(paste(sort(x = unique(x = x)), unlist(x = lapply(X = lapply(X = lapply(X = as.list(x = sort(x = unique(x = x))), "==", x), which), zip_string)), sep = ": "), collapse = ", "), sep = ""))), ";\n", sep = ""), collapse = "")
# Build assumptions block:
assumptions_block <- paste("BEGIN ASSUMPTIONS;\n", costmatrix_block, options_block, weights_block, "END;\n", sep = "")
# Build full string with all blocks together:
full_string <- paste("#NEXUS\n\n", header_block, taxa_block, data_block, matrix_block, assumptions_block, sep = "")
# Write to file:
write(full_string, file_name)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.