# format data.frame : 2 columns, header: darwinCOre, 2 columns: 1.: species
# name, higherGeography: presence/absence: 1_ present, 0 = absent, NA = no
# info prefix = group name as prefix for outputfiles
CladeByTrait <- function(x, tree, prefix, min_clade_size, max_clade_size, monophyly_threshold,
summary = FALSE) {
## Data preparation
if (is.data.frame(x)) {
geo <- data.frame(trait = x[, 2], row.names = x[, 1])
}
dat.tre <- suppressWarnings(geiger::treedata(tree, geo))
# records from the tree with no occrrences
nodata <- tree$tip.label[!tree$tip.label %in% row.names(geo)]
nodatatab <- data.frame(rep(NA, length(nodata)), row.names = nodata)
# records not occurring in the study area
outsidetab <- data.frame(subset(dat.tre$data, dat.tre$data[, 1] == 0))
outside <- rownames(outsidetab)
# standardize names
names(outsidetab) <- names(nodatatab) <- names(data.frame(trait = dat.tre$data))
# combine species with data and species in the phylogeny but without data
all.classified <- rbind(data.frame(trait = dat.tre$dat), nodatatab)
# species not in the phylogeny and not in the study area
others2 <- data.frame(subset(geo, geo[, 1] == 0)[, 1], row.names = rownames(subset(geo,
geo[, 1] == 0)))
names(others2) <- names(geo)[1]
# create summary files if desired
if (summary) {
# A modified input table, taking into account the phylogeny
write.table(all.classified, paste(prefix, "_area_classification_used.txt",
sep = ""), quote = FALSE, col.names = FALSE, sep = "\t")
# the tree with the area classification as tip colour
colo <- geo[match(tree$tip.label, rownames(geo)), ]
colo <- as.character(factor(colo, levels = c(0, 1), labels = c("blue",
"red")))
colo[is.na(colo)] <- "grey"
pdf(paste(prefix, "_phylogeny+traitdata.pdf", sep = ""), height = max(round(c(length(tree$tip.label)/30,
0), 11)), width = 8, paper = "special")
plot(tree, tip.color = colo, cex = 0.5)
legend("topleft", bg = "white", fill = c("blue", "red", "grey"), legend = c("Outside",
"Inside", "No occurrence data"), cex = 0.5)
dev.off()
}
## finding trait-exclusive clades, this part was written by R. SCharn
tr <- dat.tre$phy
# create matrix for each node on state 1 ('not checked')
nodenr <- matrix(nrow = length((length(tr$tip.label) + 1):max(tr$edge)),
ncol = 2)
# sets all nodes to 1 ie all can be checked
nodenr[, 1] <- 1
nodenr[, 2] <- 0
rownames(nodenr) <- (length(tr$tip.label) + 1):max(tr$edge)
colnames(nodenr) <- c("checked_monophyletic", "clade_size_mono")
# check for clades monophylethic for area 'X' skip nodes identified with 0
# (not monophyletic for a given area OR descendent of monophyletic clade) __
# # monophyly_threshold is NUMBER of species not in area of interest __
# V=geo[tips(tr,i),area] __ if (length(V[V == 0]) > monophyly_threshold ){
# __ nodenr[as.character(i),1]=0 __ } else{ monophyly_threshold is
# percentage of species not in area of interest OUT OF CLADE SIZE
for (i in (length(tr$tip.label) + 1):max(tr$edge)) {
if (nodenr[as.character(i), 1] == 1) {
V <- geo[geiger::tips(tr, i), "trait"]
if (length(V[V == 0])/length(V) >= monophyly_threshold || length(geiger::tips(tr,
i)) > max_clade_size) {
nodenr[as.character(i), 1] <- 0
} else {
nodenr[as.character(i), 2] <- length(geiger::tips(tr, i)) # number of tips in the clade
nodes <- .getDescend(tr, i)
nodenr[as.character(nodes[nodes > length(tr$tip.label)]), 1] <- 0 # set all descendent noeds to zero (skip check)
nodenr[as.character(i), 1] <- 2 # indentifier of monophyletic group
}
}
}
# removes from table all nodes which are not of interest clades TABLE:
# column 1: 0-> discarded (there shouldn't be any); 1-> untested (there
# shouldn't be any at the end of the loop); 2-> positive hit CALL RUUD for
# bug reports or if you want to have a good time (ruud.scharn@gmail.com)
# column 2: size of the clade
clades <- subset(nodenr, nodenr[, 2] >= min_clade_size)
## Write output
# pdf with visualization of the subtrees and write subtrees as enxus files
lengtab <- as.vector(0)
subtreelist <- as.list(0)
pdf(paste(prefix, "_subtrees.pdf", sep = ""), height = 11, width = 8, paper = "special")
for (i in 1:length(clades[, 2])) {
node <- as.numeric(row.names(clades)[i])
subtree <- ape::extract.clade(tr, node, root.edge = 0, interactive = FALSE)
lengtab[i] <- max(picante::node.age(subtree)$ages)
ape::write.tree(subtree, file = paste(prefix, "clade_", i, ".tre", sep = ""))
colo <- geo[match(subtree$tip.label, rownames(geo)), ]
colo <- gsub(1, "red", colo)
colo <- gsub(0, "blue", colo)
colo[is.na(colo)] <- "grey"
plot(subtree, tip.color = colo, cex = 0.5, main = paste("Clade", i,
sep = "_"))
legend("topleft", bg = "white", fill = c("blue", "red", "grey"), legend = c("Outside",
"Inside", "No occurrence data"), cex = 0.7)
ape::add.scale.bar()
}
dev.off()
# log file and log to screen
descwrite <- c("date", "input_prefix", "min_clade_size", "max_clade_size",
"monophyly_threshold", "Species in Phylogeny", "Species with traits data",
"Species in trait and phylogeny", "Species with no trait information",
"Number of clades returned", "Mean_tip_number_per_tree", "Median tip age")
logwrite <- c(as.character(Sys.Date()), prefix, min_clade_size, max_clade_size,
monophyly_threshold, ape::Ntip(tree), nrow(geo), ape::Ntip(tree) - length(nodata),
length(nodata), length(clades[, 2]), round(mean(clades[, 2]), digits = 2),
round(median(lengtab), 1))
screenout <- data.frame(logwrite, row.names = descwrite)
print(screenout)
if (summary) {
# write log file
if (!"GetAreasClades_log.txt" %in% list.files()) {
write.table(as.vector(data.frame(t(descwrite))), "CladesByTrait_log.txt",
col.names = FALSE, row.names = FALSE, quote = FALSE, sep = "\t")
}
write.table(as.vector(as.data.frame(t(logwrite))), col.names = FALSE, row.names = FALSE,
"CladesByTrait_log.txt", append = TRUE, quote = FALSE, sep = "\t")
# plot summary graphs
pdf(paste(prefix, "_summary_graphs.pdf", sep = "")[1], paper = "a4")
par(mar = c(12, 3, 3, 3))
barplot(as.numeric(logwrite[6:11]), names.arg = paste(descwrite[6:11],
" (", logwrite[6:10], ")", sep = ""), las = 2, cex.names = 0.7,
col = "brown", main = paste(prefix, " species numbers", sep = "")[1])
box("plot")
hist(lengtab, main = "Node age", xlab = "Node age", col = "brown")
hist(clades[, 2], main = "Number of tips per clade", xlab = "Number of tips per clade",
col = "brown")
dev.off()
# summary table with indeces for each clade
clades_write <- data.frame(clades, node_nr = as.numeric(rownames(clades)),
row.names = 1:length(clades[, 1]))[, c(3, 1, 2)] #add node number as column
write.table(clades_write, paste("clade_identity_and_size_", area, ".txt",
sep = ""), sep = "\t")
# log to screen
warning(paste(descwrite, logwrite, rep("\n", length(logwrite))))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.