#' localscalarPP
#'
#' A function that takes the output of a kappa, lambda, delta, VRates etc. RJ bayesTraits run and runs post-processing on it.
#' @param rjlog The RJ output of the run - typically suffixed with .VarRates.txt
#' @param tree The tree the analysis was run on
#' @param burnin The burnin (if required) for the mcmc (generally worked out from the other logfile)
#' @param thinning Thinning parameter for the MCMC output - again, worked out from the raw MCMC output logfile.
#' @param returnscales Return full distributions of all scalar values that hit each node?
#' @param returnorigins Return a list of the values of each scalar that originates at each node per iteration?
#' @import phytools
#' @export
#' @name localscalarPP
localscalarPP <- function(rjlog, rjtrees, tree, burnin = 0, thinning = 1, returnscales = FALSE,
returnorigins = FALSE) {
# load the sample of trees.
extree <- ladderize(tree)
print("Loading log file.")
rjout <- loadRJ(rjlog, burnin = burnin, thinning = thinning)
rj_output <- rjout$rj_output
subtrees <- subtrees
rjtaxa <- rjout$taxa
print("Loading posterior trees.")
posttrees <- read.nexus(rjtrees)
posttrees <- posttrees[burnin:length(posttrees)]
# Make a list to store descriptions of each scalar present in each iteration.
ratesperit <- vector(mode = "list", length = nrow(rj_output))
print("Searching for scalars...")
pb <- txtProgressBar(min = 0, max = nrow(rj_output), style = 3)
for (i in 1:nrow(rj_output)) {
lastrates <- rj_output[i, !is.na(rj_output[i, ])]
# If the number of columns is seven, there are no scalars applied this generation.
if (ncol(lastrates) == 7) {
node <- NA
scale <- NA
created <- NA
nodebranchdelta <- NA
} else {
int <- lastrates[8:length(lastrates)]
node <- unlist(c(int[grep("NodeID*", names(int))]))
scale <- unlist(c(int[grep("Scale*", names(int))]))
created <- unlist(c(int[grep("CreatIt*", names(int))]))
nodebranchdelta <- unlist(c(int[grep("NodeBranch*", names(int))]))
}
scalars <- data.frame(node = node, scale = scale, created = created, nodebranchdelta = nodebranchdelta)
ratesperit[[i]] <- scalars
setTxtProgressBar(pb, i)
}
close(pb)
counts <- matrix(ncol = 68, nrow = (nrow(extree$edge) + 1))
colnames(counts) <- c("branch", "ancNode", "descNode", "nTips", "start", "end", "mid", "orgBL", "meanBL", "medianBL", "quart25", "quart75",
"itersScaled", "itersRatescaled", "itersDelta", "itersKappa", "itersLambda",
"pScaled", "pRate", "pDelta", "pKappa", "pLambda",
"nScalar", "nRate", "nDelta", "nKappa", "nLambda",
"nOrgnScalar", "nOrgnNRate", "nOrgnBRate", "nOrgnDelta", "nOrgnKappa", "nOrgnLambda",
"mnSpI", "mnRpI", "mnDpI", "mnKpI", "mnLpI",
"mnSpE", "mnRpE", "mnDpE", "mnKpE", "mnLpE",
"rangeRate", "lqRate", "uqRate", "meanRate", "medianRate", "modeRate",
"rangeDelta", "lqDelta", "uqDelta", "meanDelta", "medianDelta", "modeDelta",
"rangeKappa", "lqKappa", "uqKappa", "meanKappa", "medianKappa", "modeKappa",
"rangeLambda", "lqLambda", "uqLambda", "meanLambda", "medianLambda", "modeLambda", "species")
counts[ , "branch"] <- c(0:nrow(extree$edge))
counts[ , "ancNode"] <- c(0, extree$edge[ , 1])
counts[ , "descNode"] <- c((length(tree$tip.label) + 1), extree$edge[ , 2])
counts[ , "orgBL"] <- c(0, extree$edge.length)
print("Calculating mean branch lengths.")
meanbl <- meanBranches(reftree = extree, trees = rjtrees, burnin = burnin, thinning = thinning, pbar = TRUE)
counts[ , "meanBL"] <- c(0, meanbl$meanbranches)
counts[ , "medianBL"] <- c(0, meanbl$medianbranches)
counts[ , "quart25"] <- c(0, meanbl$quart25)
counts[ , "quart75"] <- c(0, meanbl$quart75)
hts <- nodeHeights(extree)
hts <- round(abs(hts - max(hts)), 4)
counts[ , "start"] <- c(0, hts[ , 1])
counts[ , "end"] <- c(0, hts[ , 2])
counts <- as.data.frame(counts)
# Deal with the root
descs <- getDescs(extree, node = counts[1, "descNode"])
counts[1, "nTips"] <- sum(descs <= length(tree$tip.label))
counts[1, "mid"] <- 0
counts[1, "species"] <- paste0(extree$tip.label[order(extree$tip.label)], collapse = ",")
for (i in 2:nrow(counts)) {
descs <- getDescs(extree, node = counts[i, "descNode"])
counts[i, "nTips"] <- sum(descs <= length(tree$tip.label))
if (counts[i, "nTips"] == 0) {
counts[i, "nTips"] <- 1
}
if (counts[i, "descNode"] <= length(extree$tip.label)) {
counts[i, "species"] <- extree$tip.label[counts[i, "descNode"]]
} else {
tips <- getDescs(extree, counts[i, "descNode"])
tips <- tips[tips <= length(extree$tip.label)]
tips <- extree$tip.label[tips]
counts[i, "species"] <- paste0(sort(tips), collapse = ",")
}
counts[i, "mid"] <- mean(c(hts[(i - 1), 1], hts[(i - 1), 2]))
}
counts[ , c(13:67)] <- 0
rates <- matrix(nrow = nrow(counts), ncol = length(ratesperit))
rownames(rates) <- counts[ , "branch"]
deltas <- vector(mode = "list", length = nrow(counts))
kappas <- vector(mode = "list", length = nrow(counts))
lambdas <- vector(mode = "list", length = nrow(counts))
names(rates) <- c("root", extree$edge[ , 2])
names(deltas) <- c("root", extree$edge[ , 2])
names(kappas) <- c("root", extree$edge[ , 2])
names(lambdas) <- c("root", extree$edge[ , 2])
# make lists for the origins of deltas etc.
.tmp <- rep(1, length(ratesperit))
node_origins <- replicate(nrow(counts), as.numeric(paste(.tmp)), simplify = FALSE)
branch_origins <- replicate(nrow(counts), as.numeric(paste(.tmp)), simplify = FALSE)
delta_origins <- replicate(nrow(counts), as.numeric(paste(.tmp)), simplify = FALSE)
lambda_origins <- replicate(nrow(counts), as.numeric(paste(.tmp)), simplify = FALSE)
kappa_origins <- replicate(nrow(counts), as.numeric(paste(.tmp)), simplify = FALSE)
names(node_origins) <- c("root", extree$edge[ , 2])
names(branch_origins) <- c("root", extree$edge[ , 2])
names(delta_origins) <- c("root", extree$edge[ , 2])
names(lambda_origins) <- c("root", extree$edge[ , 2])
names(kappa_origins) <- c("root", extree$edge[ , 2])
print("Processing scalars...")
pb <- txtProgressBar(min = 0, max = length(ratesperit), style = 3)
for (i in 1:length(ratesperit)) {
scalars <- ratesperit[[i]]
# make a list of all the branches, as defined by descendent node, with a column of zeroes for any scalar, rate, delta,
# kappa and lambda. These become ones when a scalar is place.
if (!all(is.na(ratesperit[[i]]))) {
# Generate the table to store the current scalar of the current iteration.
comptable <- matrix(ncol = 6, nrow = (nrow(extree$edge) + 1))
comptable[1, 1] <- length(tree$tip.label) + 1
comptable[c(2:nrow(comptable)) , 1] <- extree$edge[ , 2]
comptable[ , c(2:6)] <- 0
colnames(comptable) <- c("descNode", "totalscalar", "rate", "delta", "kappa", "lambda")
iteration_rates <- rep(1, nrow(counts))
for (j in 1:nrow(scalars)) {
# Get the node the scalar applies to, and the type of scalar.
currentnode <- scalars[j, "node"]
currenttrans <- scalars[j, "nodebranchdelta"]
currentscale <- as.numeric(as.character(scalars[j, "scale"]))
# Find the taxa numbers that descend from the node that the scalar is applied too.
taxa <- rjout$subtrees[rjout$subtrees$node == currentnode, ]
taxa <- taxa[ , !is.na(taxa)]
taxa <- taxa[c(4:length(taxa))]
# Find the MRCA of those taxa - now the node number (mrca) is in terms of a phylogeny as stored in ape.
# If the taxa is a single tip, make that MRCA (this can only apply to branch scalars.)
if (length(taxa) == 1) {
mrca <- which(tree$tip.label == rjout$taxa[rjout$taxa[ , 1] %in% taxa, 2])
} else {
mrca <- getMRCA(extree, rjout$taxa[rjout$taxa[ , 1] %in% taxa, 2])
}
# Find out what type of scalar is currently under consideration.
if (currenttrans == "Node") {
# If it's a node, the descendents of that node, plus the branch leading to it, need to be considered.
descs <- c(getDescs(extree, mrca), mrca)
# Then find branches from descs and count.
counts[counts[, "descNode"] %in% descs , "nScalar"] <- counts[counts[, "descNode"] %in% descs , "nScalar"] + 1
counts[counts[, "descNode"] %in% descs , "nRate"] <- counts[counts[, "descNode"] %in% descs , "nRate"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnScalar"] <- counts[counts[, "descNode"] == mrca, "nOrgnScalar"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnNRate"] <- counts[counts[, "descNode"] == mrca, "nOrgnNRate"] + 1
rws <- comptable[ , 1] %in% descs
comptable[rws & comptable[ , "totalscalar"] == 0, "totalscalar"] <- 1
comptable[rws & comptable[ , "rate"] == 0, "rate"] <- 1
# Finally record the rates.
if (mrca == length(extree$tip.label) + 1) {
node_origins$root[i] <- currentscale
} else {
node_origins[[which(names(node_origins) == mrca)]][i] <- currentscale
}
for (k in which(rws)) {
iteration_rates[k] <- iteration_rates[[k]] * currentscale
}
}
if (currenttrans == "Branch") {
# Just use the branch that ends in mrca.
descs <- mrca
rws <- comptable[ , 1] %in% descs
counts[counts[, "descNode"] == mrca , "nScalar"] <- counts[counts[, "descNode"] == mrca , "nScalar"] + 1
counts[counts[, "descNode"] %in% descs , "nRate"] <- counts[counts[, "descNode"] %in% descs , "nRate"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnScalar"] <- counts[counts[, "descNode"] == mrca, "nOrgnScalar"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnBRate"] <- counts[counts[, "descNode"] == mrca, "nOrgnBRate"] + 1
comptable[rws & comptable[ , "totalscalar"] == 0, "totalscalar"] <- 1
comptable[rws & comptable[ , "rate"] == 0, "rate"] <- 1
# Finally record the rates.
if (mrca == length(extree$tip.label) + 1) {
branch_origins$root[i] <- currentscale
} else {
branch_origins[[which(names(branch_origins) == mrca)]][i] <- currentscale
}
for (k in which(rws)) {
iteration_rates[k] <- iteration_rates[[k]] * currentscale
}
}
if (currenttrans == "Delta") {
# Define the branches (as descendent nodes) that were scaled.
descs <- c(getDescs(extree, mrca), mrca)
# Get rows of the table that need to have the sount increased.
rws <- comptable[ , 1] %in% descs
# Increase the number of deltas for each node in this scalar by 1, in the counts table, and
# the total number of scalars.
counts[counts[, "descNode"] %in% descs , "nScalar"] <- counts[counts[, "descNode"] %in% descs , "nScalar"] + 1
counts[counts[, "descNode"] %in% descs , "nDelta"] <- counts[counts[, "descNode"] %in% descs , "nDelta"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnScalar"] <- counts[counts[, "descNode"] == mrca, "nOrgnScalar"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnDelta"] <- counts[counts[, "descNode"] == mrca, "nOrgnDelta"] + 1
# Then adjust the comptable to a) record yes or no for being scaled at all, and b) record
# yes or no for each of the scalars.
comptable[rws & comptable[ , "totalscalar"] == 0, "totalscalar"] <- 1
comptable[rws & comptable[ , "delta"] == 0, "delta"] <- 1
# Finally record the deltas.
if (mrca == length(extree$tip.label) + 1) {
delta_origins$root[i] <- currentscale
} else {
delta_origins[[which(names(delta_origins) == mrca)]][i] <- currentscale
}
for (k in which(rws)) {
deltas[[k]] <- c(deltas[[k]], currentscale)
}
}
if (currenttrans == "Kappa") {
descs <- c(getDescs(extree, mrca), mrca)
rws <- comptable[ , 1] %in% descs
counts[counts[, "descNode"] %in% descs , "nScalar"] <- counts[counts[, "descNode"] %in% descs , "nScalar"] + 1
counts[counts[, "descNode"] %in% descs , "nKappa"] <- counts[counts[, "descNode"] %in% descs , "nKappa"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnScalar"] <- counts[counts[, "descNode"] == mrca, "nOrgnScalar"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnKappa"] <- counts[counts[, "descNode"] == mrca, "nOrgnKappa"] + 1
comptable[rws & comptable[ , "totalscalar"] == 0, "totalscalar"] <- 1
comptable[rws & comptable[ , "kappa"] == 0, "kappa"] <- 1
# Finally record the kappas.
if (mrca == length(extree$tip.label) + 1) {
kappa_origins$root[i] <- currentscale
} else {
kappa_origins[[which(names(kappa_origins) == mrca)]][i] <- currentscale
}
for (k in which(rws)) {
kappas[[k]] <- c(kappas[[k]], currentscale)
}
}
if (currenttrans == "Lambda") {comptable
descs <- c(getDescs(extree, mrca), mrca)
rws <- comptable[ , 1] %in% descs
counts[counts[, "descNode"] %in% descs , "nScalar"] <- counts[counts[, "descNode"] %in% descs , "nScalar"] + 1
counts[counts[ , "descNode"] %in% descs , "nLambda"] <- counts[counts[, "descNode"] %in% descs , "nLambda"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnScalar"] <- counts[counts[, "descNode"] == mrca, "nOrgnScalar"] + 1
counts[counts[, "descNode"] == mrca, "nOrgnLambda"] <- counts[counts[, "descNode"] == mrca, "nOrgnLambda"] + 1
comptable[rws & comptable[ , "totalscalar"] == 0, "totalscalar"] <- 1
comptable[rws & comptable[ , "lambda"] == 0, "lambda"] <- 1
# Finally record the lambdas.
if (mrca == length(extree$tip.label) + 1) {
lambda_origins$root[i] <- currentscale
} else {
lambda_origins[[which(names(lambda_origins) == mrca)]][i] <- currentscale
}
for (k in which(rws)) {
lambdas[[k]] <- c(lambdas[[k]], currentscale)
}
}
}
rates[ , i] <- iteration_rates
# Now put the yes/no data from comptable into counts in the times scaled/rate, delta etc. This counts the number of
# ITERATIONS when scaling occurred .
tmp <- counts[ , "descNode"] %in% comptable[comptable[ , "totalscalar"] == 1, "descNode"]
counts[tmp, "itersScaled"] <- counts[tmp, "itersScaled"] + 1
tmp <- counts[ , "descNode"] %in% comptable[comptable[ , "rate"] == 1, "descNode"]
counts[tmp, "itersRatescaled"] <- counts[tmp, "itersRatescaled"] + 1
tmp <- counts[ , "descNode"] %in% comptable[comptable[ , "delta"] == 1, "descNode"]
counts[tmp, "itersDelta"] <- counts[tmp, "itersDelta"] + 1
tmp <- counts[ , "descNode"] %in% comptable[comptable[ , "kappa"] == 1, "descNode"]
counts[tmp, "itersKappa"] <- counts[tmp, "itersKappa"] + 1
tmp <- counts[ , "descNode"] %in% comptable[comptable[ , "lambda"] == 1, "descNode"]
counts[tmp, "itersLambda"] <- counts[tmp, "itersLambda"] + 1
}
setTxtProgressBar(pb, i)
}
close(pb)
# Finally generate the descriptive stuff for the scalar values, and remove columns that are all zero.
# Before all this it is pretty straight forward to calculate the probs for each scalar.
counts[ , "pScaled"] <- counts[ , "itersScaled"] / length(ratesperit)
counts[ , "pRate"] <- counts[ , "itersRatescaled"] / length(ratesperit)
counts[ , "pDelta"] <- counts[ , "itersDelta"] / length(ratesperit)
counts[ , "pKappa"] <- counts[ , "itersKappa"] / length(ratesperit)
counts[ , "pLambda"] <- counts[ , "itersLambda"] / length(ratesperit)
counts[ , "mnSpI"] <- counts[ , "nScalar"] / length(ratesperit)
counts[ , "mnRpI"] <- counts[ , "nRate"] / length(ratesperit)
counts[ , "mnDpI"] <- counts[ , "nDelta"] / length(ratesperit)
counts[ , "mnKpI"] <- counts[ , "nKappa"] / length(ratesperit)
counts[ , "mnLpI"] <- counts[ , "nLambda"] / length(ratesperit)
counts[ , "mnSpE"] <- counts[ , "nScalar"] / counts[ , "itersScaled"]
counts[ , "mnRpE"] <- counts[ , "nRate"] / counts[ , "itersRatescaled"]
counts[ , "mnDpE"] <- counts[ , "nDelta"] / counts[ , "itersDelta"]
counts[ , "mnKpE"] <- counts[ , "nKappa"] / counts[ , "itersKappa"]
counts[ , "mnLpE"] <- counts[ , "nLambda"] / counts[ , "itersLambda"]
# This now works through a table, not a list...
for (i in 1:nrow(rates)) {
counts[i, "rangeRate"] <- max(rates[i, ]) - min(rates[i, ])
counts[i, "lqRate"] <- quantile(rates[i, ])[2]
counts[i, "uqRate"] <- quantile(rates[i, ])[4]
counts[i, "meanRate"] <- mean(rates[i, ])
counts[i, "medianRate"] <- median(rates[i, ])
if (length(unique(rates[i, ])) == 1){
counts[i, "modeRate"] <- unique(rates[i, ])
} else {
dens <- density(rates[i, ])
counts[i, "modeRate"] <- dens$x[which(dens$y == max(dens$y))]
}
}
for (i in 1:length(deltas)) {
if (is.null(deltas[[i]])) {
deltas[[i]] <- 1
}
counts[i, "rangeDelta"] <- max(deltas[[i]]) - min(deltas[[i]])
counts[i, "lqDelta"] <- quantile(deltas[[i]])[2]
counts[i, "uqDelta"] <- quantile(deltas[[i]])[4]
counts[i, "meanDelta"] <- mean(deltas[[i]])
counts[i, "medianDelta"] <- median(deltas[[i]])
if (length(deltas[[i]]) > 1) {
dens <- density(deltas[[i]])
if (length(dens$x[which(dens$y == max(dens$y))]) != 1) {
dens_poss <- dens$x[which(dens$y == max(dens$y))]
counts[i, "modeDelta"] <- sample(dens_poss, 1)
} else {
counts[i, "modeDelta"] <- dens$x[which(dens$y == max(dens$y))]
}
} else {
counts[i, "modeDelta"] <- deltas[[i]]
}
}
for (i in 1:length(kappas)) {
if (is.null(kappas[[i]])) {
kappas[[i]] <- 1
}
counts[i, "rangeKappa"] <- max(kappas[[i]]) - min(kappas[[i]])
counts[i, "lqKappa"] <- quantile(kappas[[i]])[2]
counts[i, "uqKappa"] <- quantile(kappas[[i]])[4]
counts[i, "meanKappa"] <- mean(kappas[[i]])
counts[i, "medianKappa"] <- median(kappas[[i]])
if (length(kappas[[i]]) > 1) {
dens <- density(kappas[[i]])
if (length(dens$x[which(dens$y == max(dens$y))]) != 1) {
dens_poss <- dens$x[which(dens$y == max(dens$y))]
counts[i, "modeKappa"] <- sample(dens_poss, 1)
} else {
counts[i, "modeKappa"] <- dens$x[which(dens$y == max(dens$y))]
}
} else {
counts[i, "modeKappa"] <- kappas[[i]]
}
}
for (i in 1:length(lambdas)) {
if (is.null(lambdas[[i]])) {
lambdas[[i]] <- 1
}
counts[i, "rangeLambda"] <- max(lambdas[[i]]) - min(lambdas[[i]])
counts[i, "lqLambda"] <- quantile(lambdas[[i]])[2]
counts[i, "uqLambda"] <- quantile(lambdas[[i]])[4]
counts[i, "meanLambda"] <- mean(lambdas[[i]])
counts[i, "medianLambda"] <- median(lambdas[[i]])
if (length(lambdas[[i]]) > 1) {
dens <- density(lambdas[[i]])
if (length(dens$x[which(dens$y == max(dens$y))]) != 1) {
dens_poss <- dens$x[which(dens$y == max(dens$y))]
counts[i, "modeLambda"] <- sample(dens_poss, 1)
} else {
counts[i, "modeLambda"] <- dens$x[which(dens$y == max(dens$y))]
}
} else {
counts[i, "modeLambda"] <- lambdas[[i]]
}
}
# Finally remove zero columns.
keeps <- NULL
for (i in 1:ncol(counts)) {
if (all(is.na(counts[ , i]))) {
counts[ , i] <- 0
}
if (all(counts[ , i] == 1)) {
counts[ , i] <- 0
}
}
for (i in 1:ncol(counts)) {
if (any(counts[ , i] != 0)) {
keeps <- c(keeps, i)
}
}
counts <- counts[ , keeps]
meantree <- extree
meantree$edge.length <- counts[c(2:nrow(counts)) , "meanBL"]
res <- list(data = counts, meantree = meantree, niter = length(ratesperit))
if (returnscales) {
scalars <- list(rate = rates, delta = deltas, kappa = kappas, lambda = lambdas)
res <- c(res, list(scalars = scalars))
}
if (returnorigins) {
origins <- list(nodes = node_origins, branches = branch_origins, delta = delta_origins, kappa = kappa_origins, lambda = lambda_origins)
res <- c(res, list(origins = origins))
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.