"read.nexus.data" <- function (file)
{
# Simplified NEXUS data parser.
#
# Version: 09/13/2006 01:01:59 PM CEST
#
# By: Johan Nylander, nylander @ scs.fsu.edu
#
# WARNING: This is parser reads a restricted nexus format,
# see README for details.
#
# Argument (x) is a nexus formatted data file.
#
# Returns (Value) a list of data sequences each made of a single
# vector of mode character where each element is a character.
#
# TODO: Error checking, gap/missing, find.datatype, etc.
#------------------------------------------------------------------
"find.ntax" <- function (x)
{
for (i in 1:NROW(x)) {
if(any(f <- grep("\\bntax", x[i], ignore.case = TRUE))) {
ntax <- as.numeric(sub("(.+?)(ntax\\s*\\=\\s*)(\\d+)(.+)",
"\\3", x[i], perl = TRUE, ignore.case = TRUE))
break
}
}
ntax
}
"find.nchar" <- function (x)
{
for (i in 1:NROW(x)) {
if(any(f <- grep("\\bnchar", x[i], ignore.case = TRUE))) {
nchar <- as.numeric(sub("(.+?)(nchar\\s*\\=\\s*)(\\d+)(.+)",
"\\3", x[i], perl = TRUE, ignore.case = TRUE))
break
}
}
nchar
}
"find.matrix.line" <- function (x)
{
for (i in 1:NROW(x)) {
if(any(f <- grep("\\bmatrix\\b", x[i], ignore.case = TRUE))) {
matrix.line <- as.numeric(i)
break
}
}
matrix.line
}
"trim.whitespace" <- function (x)
{
gsub("\\s+", "", x)
}
"trim.semicolon" <- function (x)
{
gsub(";", "", x)
}
if(file.access(file, mode = 4)) {
stop("file could not be found")
}
X <- scan(file = file, what = character(), sep = "\n",
quiet = TRUE, comment.char = "[", strip.white = TRUE)
ntax <- find.ntax(X)
nchar <- find.nchar(X)
matrix.line <- find.matrix.line(X)
start.reading <- matrix.line + 1
Obj <- list()
length(Obj) <- ntax
i <- 1
pos <- 0
tot.nchar <- 0
tot.ntax <- 0
for (j in start.reading:NROW(X)) {
Xj <- trim.semicolon(X[j])
if(Xj == "") {
break
}
if(any(jtmp <- grep("\\bend\\b", X[j], perl = TRUE, ignore.case = TRUE))) {
break
}
ts <- unlist(strsplit(Xj, "(?<=\\S)(\\s+)(?=\\S)", perl = TRUE))
if (length(ts) > 2) {
stop("nexus parser does not handle spaces in sequences or taxon names (ts>2)")
}
if (length(ts) !=2) {
stop("nexus parser failed to read the sequences (ts!=2)")
}
Seq <- trim.whitespace(ts[2])
Name <- trim.whitespace(ts[1])
nAME <- paste(c("\\b", Name, "\\b"), collapse = "")
if (any(l <- grep(nAME, names(Obj)))) {
tsp <- strsplit(Seq, NULL)[[1]]
for (k in 1:length(tsp)) {
p <- k + pos
Obj[[l]][p] <- tsp[k]
chars.done <- k
}
}
else {
names(Obj)[i] <- Name
tsp <- strsplit(Seq, NULL)[[1]]
for (k in 1:length(tsp)) {
p <- k + pos
Obj[[i]][p] <- tsp[k]
chars.done <- k
}
}
tot.ntax <- tot.ntax + 1
if (tot.ntax == ntax) {
i <- 1
tot.ntax <- 0
tot.nchar <- tot.nchar + chars.done
if (tot.nchar == nchar*ntax) {
print("ntot was more than nchar*ntax")
break
}
pos <- tot.nchar
}
else {
i <- i + 1
}
}
if (tot.ntax != 0) {
cat("ntax:",ntax,"differ from actual number of taxa in file?\n")
stop("nexus parser did not read names correctly (tot.ntax!=0)")
}
for (i in 1:length(Obj)) {
if (length(Obj[[i]]) != nchar) {
cat(names(Obj[i]),"has",length(Obj[[i]]),"characters\n")
stop("nchar differ from sequence length (length(Obj[[i]])!=nchar)")
}
}
Obj <- lapply(Obj, tolower)
Obj
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.