clock.space | R Documentation |
clock.space takes in the output from the function collect.clocks, and places rates data in a modified Euclidean space using MDS or PCA. Additionally, it performs tests of several clock hypotheses.
clock.space(ratesmat, sptr, pca = T, mds = F, sp.time.tree = T, log.branches = F, pca.permutations = 100, mean.scaling.brlen = 0.05, ncore = 1, sammon.correction = F, verbose = T, pdf.file = NULL)
ratesmat |
A list as generated by the function collect.clocks, containing the raw branch lengths collected from loci trees, and a vector of the number of loci that contain each of the branches of the species tree. |
sptr |
A phylogenetic tree of class 'phylo'. |
pca |
A logical indicating whether PCA analysis on rates space should be performed. |
mds |
A logical indicating whether MDS analysis on rates space should be performed. |
sp.time.tree |
A logical indicating whether the species tree is a time-tree. |
log.branches |
A logical indicating whether branches are to be log-transformed. |
pca.permutations |
A numeric of the number of permutations for tests on the PCA space. |
mean.scaling.brlen |
A numeric of the scaling parameter passed to bsd.dist, only used when performing MDS. |
ncore |
A numeric value of the number of computer cores to be used for calculating the bsd matrix. |
sammon.correction |
A logical indicating whether to perform sammon correction to reduce distortion of the MDS space. |
verbose |
A logical indicating whether to provide details during the run. |
pdf.file |
A character of the prefix to be used for making default figures of two dimensions of = the spaces produced. Can be NULL if no figure is desired. |
The function imputes any missing data of rates and provides figures that summarise the support for the first two dimensions. In the case of PCA, the function performs tests of the clock space and the figures include vectors for the ten most important branches in describing the variation in the first two principal components.
An extended list of class 'clockstarx' from that produced by collect.clocks.
David Duchene
collect.clocks
set.seed(12345)
sptr <- rcoal(20)
loctrs <- lapply(1:20, function(x) rtree(20))
crx.clocks <- collect.clocks(loctrs, sptr)
crx.space <- clock.space(crx.clocks, sptr)
crx.space
## The function is currently defined as
function (ratesmat, sptr, pca = T, mds = F, sp.time.tree = T,
log.branches = F, pca.permutations = 100, mean.scaling.brlen = 0.05,
ncore = 1, sammon.correction = F, verbose = T, pdf.file = NULL)
{
if (sp.time.tree) {
rootlineages <- lapply(Descendants(sptr, getMRCA(sptr,
sptr$tip.label), type = "children"), function(x) sptr$tip.label[Descendants(sptr,
x, type = "tips")[[1]]])
if (is.rooted(sptr))
sptr <- unroot(sptr)
rootbr <- which(apply(sptr$edge, 1, function(x) all(c(mrca.phylo(sptr,
rootlineages[[1]]), mrca.phylo(sptr, rootlineages[[2]]))
x)))
}
else if (is.rooted(sptr)) {
sptr <- unroot(sptr)
}
raw.rates.matrix <- apply(ratesmat$raw.rates.matrix, 2, function(x) {
if (log.branches) {
if (any(x == 0, na.rm = T))
x[x == 0] <- 1e-06
x <- log(x)
}
if (sum(is.na(x)) < (length(x) - 2) & any(x < 0, na.rm = T))
x <- x + abs(min(x, na.rm = T))
return(x)
})
meanlen <- mean(as.numeric(raw.rates.matrix), na.rm = T)
impudats <- apply(raw.rates.matrix, 2, function(x) {
nas <- is.na(x)
if (any(nas)) {
if (all(nas))
return(rep(meanlen, length(x)))
if (sum(nas) == (length(x) - 1))
x[nas] <- rep(meanlen, (length(x) - 1))
else x[nas] <- mean(x, na.rm = T)
return(x)
}
else {
return(x)
}
})
if (sp.time.tree)
impudats[, rootbr] <- meanlen
impudats.weighted <- weigh.clocks(impudats)
if (mds) {
cat("Making pairwise comparisons of clocks across loci",
fill = T)
rawmatfun <- function(y) {
trs <- apply(y, 1, function(x) {
trtemp <- sptr
trtemp$edge.length <- x
return(trtemp)
})
class(trs) <- "multiPhylo"
trtempKF <- KF.dist(trs)
return(trtempKF)
}
rawmat <- rawmatfun(impudats)
bsdmat <- bsd.matrix(impudats, sptr, mean.brlen = mean.scaling.brlen,
ncore = ncore)
bsdmats.weighted <- bsd.matrix(impudats.weighted, sptr,
mean.brlen = mean.scaling.brlen, ncore = ncore)
cat("Finished making pairwise comparisons", fill = T)
require(MASS)
mdsdata.bsd <- if (sammon.correction)
sammon(as.dist(bsdmat))
else cmdscale(as.dist(bsdmat), eig = T)
mdsdata.weighted <- if (sammon.correction)
sammon(as.dist(bsdmats.weighted))
else cmdscale(as.dist(bsdmats.weighted), eig = T)
}
if (pca) {
pcadata.raw <- pca.perm.test(impudats, perm = pca.permutations)
pcadata.weighted <- pca.perm.test(impudats.weighted,
perm = pca.permutations)
}
if (!is.null(pdf.file)) {
if (mds) {
pdf(paste0(pdf.file, ".MDS.basic.pdf"), width = 8,
height = 4, useDingbats = F)
par(mfrow = c(1, 2))
plot(mdsdata.bsd$points, pch = 19, xlab = "Dim 1",
ylab = "Dim 2", main = "Relative rates (MDS)")
plot(mdsdata.weighted$points, pch = 19, xlab = "Dim 1",
ylab = "Dim 2", main = "Residual rates (MDS)")
dev.off()
}
if (pca) {
pdf(paste0(pdf.file, ".PCA.scree.pdf"), width = 16,
height = 4, useDingbats = F)
par(mfrow = c(1, 4))
lam <- pcadata.raw[[1]]$sdev * sqrt(nrow(pcadata.raw[[1]]$x))
topload <- head(order(apply(pcadata.raw[[1]]$rotation[,
1:2]^2, 1, function(x) sqrt(sum(x))), decreasing = T),
10)
lx <- cbind(t(t(pcadata.raw[[1]]$rotation[topload,
1]) * lam[1]))
ly <- cbind(t(t(pcadata.raw[[1]]$rotation[topload,
2]) * lam[2]))
rownames(lx) <- rownames(ly) <- gsub("V", "br ",
rownames(lx))
if (any(abs(c(lx, ly)) < 1e-04)) {
nonzeroloads <- which(abs(lx) > 1e-04 & abs(ly) >
1e-04)
lx <- cbind(lx[nonzeroloads, ])
ly <- cbind(ly[nonzeroloads, ])
}
plot(pcadata.raw[[1]]$x[, 1:2], pch = 19, xlab = paste0("PC 1 (",
round(summary(pcadata.raw[[1]])$importance[2] *
100, 1), "
100, 1), "
range(pcadata.raw[[1]]$x[, 1]))), max(c(range(lx),
range(pcadata.raw[[1]]$x[, 1])))), ylim = c(min(c(range(ly),
range(pcadata.raw[[1]]$x[, 2]))), max(c(range(ly),
range(pcadata.raw[[1]]$x[, 2])))))
abline(v = 0, lty = 2, col = "grey50")
abline(h = 0, lty = 2, col = "grey50")
arrows(x0 = 0, x1 = lx, y0 = 0, y1 = ly, length = 0.1,
lwd = 1)
text(lx, ly, labels = rownames(lx), pos = c(3, 1,
3, 1), offset = 0.3, cex = 1)
nulldat <- rbind(cbind(as.vector(pcadata.raw[[2]]),
rep(1:length(pcadata.raw[[1]]$sdev), each = pca.permutations)),
cbind(summary(pcadata.raw[[1]])$importance[2,
], 1:length(pcadata.raw[[1]]$sdev)))
plot(nulldat[, c(2, 1)], pch = c(rep(20, nrow(nulldat) -
length(pcadata.raw[[1]]$sdev)), rep(17, length(pcadata.raw[[1]]$sdev))),
col = c(rep("darkgrey", nrow(nulldat) - length(pcadata.raw[[1]]$sdev)),
rep("red", length(pcadata.raw[[1]]$sdev))),
main = "Residual rates PCA scree", ylab = "Proportion variance explained",
xlab = "Principal component")
lines(tail(nulldat[, c(2, 1)], length(pcadata.raw[[1]]$sdev)),
col = 2)
legend("topright", pch = c(17, 20), col = c("red",
"darkgrey"), legend = c("Empirical", "Permuted"))
lamW <- pcadata.weighted[[1]]$sdev * sqrt(nrow(pcadata.weighted[[1]]$x))
topload <- head(order(apply(pcadata.weighted[[1]]$rotation[,
1:2]^2, 1, function(x) sqrt(sum(x))), decreasing = T),
10)
lxW <- cbind(t(t(pcadata.weighted[[1]]$rotation[topload,
1]) * lamW[1]))
lyW <- cbind(t(t(pcadata.weighted[[1]]$rotation[topload,
2]) * lamW[2]))
rownames(lxW) <- rownames(lyW) <- gsub("V", "br ",
rownames(lxW))
if (any(abs(c(lxW, lyW)) < 1e-04)) {
nonzeroloads <- which(abs(lxW) > 1e-04 & abs(lyW) >
1e-04)
lxW <- cbind(lxW[nonzeroloads, ])
lyW <- cbind(lyW[nonzeroloads, ])
}
plot(pcadata.weighted[[1]]$x[, 1:2], pch = 19, xlab = paste0("PC 1 (",
round(summary(pcadata.weighted[[1]])$importance[2] *
100, 1), "
100, 1), "
xlim = c(min(c(range(lxW), range(pcadata.weighted[[1]]$x[,
1]))), max(c(range(lxW), range(pcadata.weighted[[1]]$x[,
1])))), ylim = c(min(c(range(lyW), range(pcadata.weighted[[1]]$x[,
2]))), max(c(range(lyW), range(pcadata.weighted[[1]]$x[,
2])))))
abline(v = 0, lty = 2, col = "grey50")
abline(h = 0, lty = 2, col = "grey50")
arrows(x0 = 0, x1 = lxW, y0 = 0, y1 = lyW, length = 0.1,
lwd = 1)
text(lxW, lyW, labels = rownames(lxW), pos = c(3,
1, 3, 1), offset = 0.1, cex = 1)
nulldat <- rbind(cbind(as.vector(pcadata.weighted[[2]]),
rep(1:length(pcadata.weighted[[1]]$sdev), each = pca.permutations)),
cbind(summary(pcadata.weighted[[1]])$importance[2,
], 1:length(pcadata.weighted[[1]]$sdev)))
plot(nulldat[, c(2, 1)], pch = c(rep(20, nrow(nulldat) -
length(pcadata.weighted[[1]]$sdev)), rep(17,
length(pcadata.weighted[[1]]$sdev))), col = c(rep("darkgrey",
nrow(nulldat) - length(pcadata.weighted[[1]]$sdev)),
rep("red", length(pcadata.weighted[[1]]$sdev))),
main = "Residual rates PCA scree", ylab = "Proportion variance explained",
xlab = "Principal component")
lines(tail(nulldat[, c(2, 1)], length(pcadata.weighted[[1]]$sdev)),
col = 2)
legend("topright", pch = c(17, 20), col = c("red",
"darkgrey"), legend = c("Empirical", "Permuted"))
dev.off()
}
}
reslist <- c(ratesmat, list(imputed.clocks = impudats, weighted.imputed.clocks = impudats.weighted))
if (mds)
reslist <- c(reslist, list(bsd.pairwise.matrix = bsdmat,
weighted.bsd.pairwise.matrix = bsdmats.weighted,
bsd.clock.space = mdsdata.bsd, weighted.bsd.clock.space = mdsdata.weighted))
if (pca)
reslist <- c(reslist, list(pca.clock.space = pcadata.raw,
weighted.pca.clock.space = pcadata.weighted))
if (verbose) {
cat("Output includes:", fill = T)
for (i in 1:length(reslist)) cat(paste0(i, ". ", names(reslist)[i]),
fill = T)
}
class(reslist) <- "clockstarx"
return(reslist)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.