Nothing
# function to import monospaced commuity tables
read.verbatim <- function (file, colnames, layers, replace = c("|", "-", "_"), species.only = FALSE, vertical = TRUE, verbose = FALSE) {
if (missing(file))
stop("plaese supply a path to a file")
if (!missing(layers)) {
if (!is.list(layers) & !is.character(layers) & !is.logical(layers)) {
stop("layers must be a list or character vector", call. = FALSE)
} else {
if (is.list(layers)) {
stopifnot(length(names(layers)) == length(layers))
l <- rep(names(layers), lapply(layers, function (x) diff(x) + 1))
stop("use @ assignment in inputfile", call. = FALSE)
} else {
if (is.character(layers)) {
if (length(layers) == 1)
at <- "@"
else
l <- layers
}
paste.layers <- TRUE # add layers to taxa
with.layers <- TRUE # prune layer from taxa
}
}
} else {
with.layers <- FALSE
}
# read file
txt <- readLines(file.path(file))
# test argument colnames
test <- str_detect(txt, colnames)
if (!any(test)) {
stop("Did not find matches for argument colnames: ", colnames, ". Maybe misspelled?")
}
# get position of keywords
hb <- grep("BEGIN HEAD", txt)
he <- grep("END HEAD", txt)
tb <- grep("BEGIN TABLE", txt)
te <- grep("END TABLE", txt)
kw <- c(hb, he, tb, te)
# test for completeness
test <- length(hb) != 1 | length(he) != 1 | length(tb) != 1 | length(te) != 1
if (test) stop("did not find all keywords!", call. = FALSE)
# test tabs
if (length(grep("\t", txt) > 0))
stop("detected tab characters, please review your data.", call. = FALSE)
# replace
if (length(replace) > 0) {
for (i in 1:length(replace)) {
txt <- sapply(txt,
function (x) gsub(replace[ i ], " ", x, fixed = TRUE),
USE.NAMES = FALSE)
}
}
# find empty lines
el <- sapply(txt, nchar, USE.NAMES = FALSE)
# find also uncomplete lines not representing data
# for example, lines consisting only of spaces
ul <- el < median(el) & el != 0
# disregard keywords
ul[kw] <- FALSE
el <- el == 0
el[ul] <- TRUE
# trim over long lines to the right
txt <- gsub("\\s+$", "", txt)
# select elements with species abundances and header entries
# based on position of keywords and empty lines
ti <- hi <- rep(FALSE, length(txt))
ti[ (tb + 1) : (te - 1) ] <- TRUE # species data
hi[ (hb + 1) : (he - 1) ] <- TRUE # header data
ti[ el ] <- hi[ el ] <- FALSE # omit empty lines
# we now subset the sub-tables as a vector
# of strings for each line
t.txt <- txt[ ti ] # species
h.txt <- txt[ hi ] # header
# test what we have done so far
# header may have trimming errors
nc <- nchar(h.txt)
test <- nc < median(nc)
if (any(test)) h.txt[ test ] <- str_pad(h.txt[ test ], max(nc), side = "right")
# there might still remain inconsistencies in taxa block
test0 <- nchar(t.txt)
test1 <- table(test0)
test2 <- which(names(test1)[min(test1)] == test0)
error1 <- "length of characters is not the same on all lines!"
error2 <- " check line(s) in taxa block: "
if (length(test1) > 1) {
stop(error1, error2,
test2, str(t.txt[test2]),
call. = FALSE)
}
# test header
test2 <- length(unique(nchar(h.txt))) > 1
error <- "no success in pruning header section"
if (test2) {
stop(error, which(nchar(h.txt) != median(nchar(h.txt))), call. = FALSE)
} #else {
#if (!inherits(h.txt, "matrix")) {
# h.txt <- matrix(h.txt)
#}
#}
# the species
# if passed all test proceed to cast to a mono spaced type matrix,
# where each cell is a single glyph, space or dot
t.m <- matrix(" ", ncol = length(t.txt),
nrow = max(sapply(t.txt, nchar)))
vals <- sapply(t.txt, function (x) {
sapply(1:nchar(x), function (y) substring(x, y, y), USE.NAMES = FALSE)
}, simplify = FALSE, USE.NAMES = FALSE)
t.m[] <- unlist(vals)
t.m <- t(t.m)
# search for the beginning of the block of occurences values
# crude and prone to error!
a <- apply(t.m, 2, function (x) length(grep("[[:alpha:]]", x)))
d <- apply(t.m, 2, function (x) length(grep("[[:punct:]]", x)))
jj <- which(c(d - a) > 0)[1]
# layer assigments at the end of taxa string
if (with.layers) {
jat <- which(apply(t.m, 2, function (x) all(x == at)))
if (length(jat) == 0) stop("could not resolve layer with symbol ", at, call. = FALSE)
# next blank
t.at <- t.m[ , jat:ncol(t.m) ]
s <- apply(t.at, 2, function (x) length(grep("[[:space:]]", x)))
jj <- min(which(s > 0)) + jat
}
# split species (txa) and data blocks (val)
txa <- str_trim(apply(t.m[, 1:(jj -1)], 1,
function (x) paste(x, collapse = "")), side = "right")
val <- t.m[ , jj: ncol(t.m) ]
# prune layer from taxa if present
if (with.layers) {
if (!paste.layers) {
lay <- paste(" ", l, sep = "") # add two! leading spaces
lay <- sapply(lay, function (x) grep(x, txa, fixed = TRUE))
names(lay) <- layers
if (length(unlist(lay)) != length(txa))
stop("did not find all layer codes in all rows", call. = FALSE)
for (i in layers)
txa <- gsub(paste(" ", i, sep = ""), "", txa, fixed = TRUE)
l <- rep(names(lay), sapply(lay, length))[ order(unlist(lay)) ]
txa <- str_trim(txa, side = "right")
} else {
if (length(layers) == 1) {
l <- str_trim(sapply(strsplit(txa, at), "[[", 2), side = "right")
txa <- str_trim(sapply(strsplit(txa, at), "[[", 1), side = "right")
}
}
}
# check for spaces as seperators
n.space <- apply(val, 2,
function (x) sum(sapply(x, function (y) y == " ")) )
# additional check for data integrity
test <- which(n.space != nrow(t.m) & n.space != 0)
if (verbose) cat("found ", nrow(t.m), " species")
if (length(test) > 0) {
message("some mono type character columns deviate from the expected pattern")
message("please review your data and apply necessary changes")
for (i in test) {
# missing dot
if (length(grep(".", val[,i], fixed = TRUE)) > 0) {
error1 <- which(val[,i] == ".")
error2 <- which(val[,i] == " ")
stop("missing dot in species ",
ifelse(length(error1) > length(error2), txa[error2], txa[error1]),
" in column ", i + (jj - 1), call. = FALSE)
} else {
stop("misplaced value in species",
txa[which(val[,i] != " ")],
"in column", i + (jj - 1), call. = FALSE)
}
}
} else {
if (verbose) {
cat("\nfound no obvious errors in species data block",
"\nskip",
ifelse(is.na(table(n.space)[2]), 0, table(n.space)[2]),
"columns of blanks",
"and retain", dim(val[, n.space == 0])[2], "columns of data")
}
}
# strip of rows of blanks
if (any(n.space == 0)) val <- val[, n.space == 0]
if (verbose)
cat("\ncharacters found in the species data block:",
sort(unique(as.vector(val))), "\n")
# we are now ready to build the species matrix
x <- data.frame(abbr = txa, val, stringsAsFactors = FALSE)
# the header
# test if any header data exept plot names are present
test <- length(h.txt)
if (test < 2) {
if (verbose) cat("no header data found, just plot names")
# duplicate entry and name it dummy
test <- str_detect(h.txt, colnames)
if (test) {
if (nchar("dummy") < nchar(colnames)) {
d.txt <- gsub(colnames, str_pad("dummy", width = nchar(colnames), side = "right"), h.txt)
} else {
d.txt <- gsub(colnames, substring("dummy", 1, nchar(colnames)), h.txt)
}
} else {
}
h.txt <- c(h.txt, d.txt)
}
# take care about vertical mode
if (!vertical) {
# we use object 'jj' here and hope that fits?
par <- str_trim(substring(h.txt, 1, jj -1), "right")
val <- substring(h.txt, jj, unique(nchar(h.txt)))
val <- sapply(val, function (x) strsplit(x, "[[:blank:]]"))
val <- t(sapply(val, function (x) x[x != ""]))
dimnames(val) <- NULL
# the header
if (nrow(val) > 1) {
y <- data.frame(par, val, stringsAsFactors = FALSE)
} else {
y <- data.frame(par, X1 = as.vector(val), stringsAsFactors = FALSE)
}
attr <- vector("list", length = length(par))
names(attr) <- par
if (ncol(x) != ncol(y)) stop("error parsing header with option vertical = ", vertical, call. = FALSE)
}
if (vertical) { # the default
ne <- sapply(h.txt, nchar)
# groome strings if needed
if (any(ne < median(ne))) {
h.txt[ne < median(ne)] <-
str_pad(h.txt[ne < median(ne)], width = median(ne), side = "right")
}
# split variables (var) and data blocks (val)
h.m <- matrix(" ", ncol = length(h.txt),
nrow = max(sapply(h.txt, nchar)))
vals <- sapply(h.txt, function (x) {
sapply(1:nchar(x), function (y) substring(x, y, y), USE.NAMES = FALSE)
}, simplify = FALSE, USE.NAMES = FALSE)
h.m[] <- unlist(vals)
h.m <- t(h.m)
par <- str_trim(apply(h.m[, 1:(jj -1)], 1,
function (x) paste(x, collapse = "")), side = "right")
val <- h.m[, jj: ncol(h.m)]
# check for spaces as seperators
# hopefully less typos as in species matrix
n.space <- apply(val, 2, function (x) sum(sapply(x, function (y) y == " ")) )
# and omit
# needs a second condition because there
# might be a blank in each column
if (any(n.space == 0) | any(n.space == nrow(val)))
val <- val[, n.space != nrow(val)]
# for sanity
if (dim(val)[2] != (dim(x)[2] - 1)) {
message("maybe set vertical = FALSE")
stop("\nplease check your header data for misspelling or formatting", call. = FALSE)
}
# header blocking variable
for (i in 1:length(par)) {
# assume that the frist element is not empty
# last will be assigend at i == 1 and recycled
if (par[i] == "") par[i] <- last else last <- par[i]
}
# we are now ready to build the header matrix
y <- data.frame(par, val, stringsAsFactors = FALSE)
attr <- vector("list", length = length(unique(par)))
names(attr) <- unique(par)
}
# assign to attributes list,
# perform string grooming and type convert
for (i in unique(par)) {
test <- rle(par)$values
if (any(duplicated(test))) {
error <- test[duplicated(test)]
stop("duplicates in header entries: ", error, call. = FALSE)
}
tmp <- str_trim(apply(y[y[, 1] == i, -1, drop = FALSE], 2, paste, collapse = ""))
# remove dots and drop leading zeros,
# but not for colnames (we preserve what is in the file)
if (i == colnames)
attr[[i]] <- tmp
else
attr[[i]] <- type.convert(gsub(".", "", tmp, fixed = TRUE))
}
# finally assign abbr to rownames and turn into matrix
# test if rownames can be assigned
if (length(unique(x[, 1])) != nrow(x) & !with.layers & !species.only) {
warning("\nspecies are not unique.",
" is the data structured in layers?",
"\nreturn vector of species instead of matrix")
x <- txa
} else {
if (species.only) {
x <- txa
} else {
# paste layers
if (with.layers) {
rownames(x) <- paste(x[, 1], l, sep = "@")
}
else {
rownames(x) <- x[, 1]
}
x <- x[, -1, drop = FALSE]
x <- as.matrix(x)
# assign header as attribute
# assign plot ids
if (!missing(colnames)) {
cn <- attr[[colnames]]
if (any(duplicated(cn))) {
stop("duplicated colnames not allowed: ",
paste(as.character(cn[duplicated(cn)]), collapse = " & "), call. = FALSE)
}
dimnames(x)[[2]] <- as.character(cn)
attributes(x) <- c(attributes(x), attr)
}
else {
dimnames(x)[[2]] <- NULL
attributes(x) <- c(attributes(x), attr)
}
class(x) <- c("VegsoupVerbatim", "matrix")
}
}
return(x)
}
print.VegsoupVerbatim <- function (x) {
print(as.data.frame(x))
}
# function to append to class VegsoupVerbatim
read.verbatim.append <- function (x, file, mode = c("plots", "species", "layers"), collapse = ",", abundance) {
message("read.verbatim.append is depreciated, use castFooter instead")
if (!inherits(x, "VegsoupVerbatim")) {
stop("plaese supply an object of class VegsoupVerbatim")
}
if (missing(file)) {
stop("please supply a path to a file")
}
if (missing(mode)) {
mode = "plots"
message("missing mode, but set mode to ", mode)
}
else {
MODES <- c("plots", "species", "layers")
mode <- match.arg(mode, MODES)
if (mode == "layers") {
stop("mode \"layers\" not yet implemented!", call. = FALSE)
}
}
if (!missing(abundance)) {
stopifnot(length(abundance) == 1)
if (mode == "species") {
abundance <- as.character(abundance)
}
if (mode == "plots") {
if (!is.logical(abundance)) {
abundance <- as.logical(abundance)
}
abundance <- ifelse(is.na(abundance), TRUE, FALSE)
}
}
else {
if (mode == "species") {
abundance = "+"
}
if (mode == "plots") {
abundance = TRUE
}
}
# save attributes
attr <- attributes(x)
txt <- readLines(file.path(file))
txt <- sapply(txt, function (x) {
strsplit(x, ":", fixed = TRUE)
}, USE.NAMES = FALSE)
# plain and simple uses dummy abundance
if (mode == "species") {
rn <- str_trim(unlist(lapply(txt, "[[", 1)))
xx <- gsub("[[:blank:]]", "", unlist(lapply(txt, "[[", 2)))
xx <- strsplit(xx, collapse, fixed = TRUE)
names(xx) <- rn
y <- matrix(".", nrow = length(xx), ncol = ncol(x))
colnames(y) <- colnames(x)
rownames(y) <- rn
for (i in 1:length(xx)) {
# i = 1
ii <- match(names(xx)[i], rownames(y))
stopifnot(length(ii) == 1)
jj <- match(xx[[i]], colnames(y))
y[ii, jj] <- abundance
}
}
# more structred, uses given abundance, but no layers
if (mode == "plots") {
cn <- str_trim(unlist(lapply(txt, "[[", 1)))
xx <- str_trim(unlist(lapply(txt, "[[", 2)))
xx <- sapply(strsplit(xx, collapse, fixed = TRUE), str_trim)
tmp <- vector("list", length = length(xx))
names(tmp) <- type.convert(cn) # handle leading zeros as in read.verbatim!
for (i in 1:length(xx)) {
ll <- sapply(xx[[i]], nchar)
tmp[[i]] <- list(
str_trim(substring(xx[[i]], 1, ll - 1)),
substring(xx[[i]], ll, ll))
}
rn <- unique(unlist(lapply(tmp, "[[", 1)))
y <- matrix(".", nrow = length(rn), ncol = ncol(x))
colnames(y) <- colnames(x)
rownames(y) <- rn
for (i in 1:length(tmp)) {
# i = 1
for (j in seq(along = tmp[[ i ]][[1]])) {
# j = 1
ii <- match(tmp[[ i ]][[1]][ j ], rownames(y))
jj <- match(names(tmp)[[ i ]], colnames(y))
y[ii, jj] <- tmp[[ i ]][[2]][j]
}
}
}
test <- intersect(rownames(x), rownames(y))
if (length(test) != 0) {
stop("some species in file are already present in object x: ", test)
}
x <- rbind(x, y)
class(x) <- c("VegsoupVerbatim", "matrix")
attributes(x) <- c(attributes(x), attr[-c(1:2)])
return(x)
}
# accesor to get header data from VegsoupVerbatim objects
header <- function (x) {
stopifnot(inherits(x, "VegsoupVerbatim"))
r <- data.frame(attributes(x)[- c(1:2, length(attributes(x)))])
rownames(r) <- colnames(x)
return(r)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.