data-raw/LinTests.R

library('TreeTools', quietly = TRUE, warn.conflicts = FALSE)
library('TreeDist')
library('TreeDistData')

# Lin use nTrees = 100L, nTip = 100L, replicates = 1000L
# k1 = 40, 50, 60, 70
# k2 = 10, 20, 30, 40

nTrees = 50 # Quadratic effect on runtime
nTip = 40 # Hyperexponential effect on runtime
replicates = 500 # Linear effect on runtime
message("Running tests on ", nTrees, ' ', nTip, "-leaf trees; ",
        replicates, " replicates.")

LinTestOneSet <- function (nTip, k, nTrees) {
  skeleton <- RandomTree(seq_len(k), root = TRUE)
  structure(lapply(seq_len(nTrees), function (XX) {
    tr <- skeleton
    for (i in k + seq_len(nTip - k))
      tr <- AddTip(tr, label = i)
    tr
  }), class = 'multiPhylo')
}

LinTestTwoSet <- function (nTip, k, nTrees) {
  startTree <- ape::rtree(nTip, br = NULL)

  SwapTwo <- function (x, length.x = length(x)) {
    swapsies <- sample.int(length.x, 2)
    x[swapsies] <- x[rev(swapsies)]
    x
  }

  RepeatLLI <- function (tr, k) {
    labels <- tr$tip.label
    for (i in seq_len(k)) labels <- SwapTwo(labels)
    tr$tip.label <- labels
    tr
  }

  structure(lapply(seq_len(nTrees), function (XX) RepeatLLI(startTree, k)),
            class = 'multiPhylo')
}

LinTestSPRSet <- function (nTip, k, nTrees) {
  startTree <- ape::rtree(nTip, br = NULL)

  RepeatSPR <- function (tr, k) {
    for (i in seq_len(k)) tr <- TreeSearch::SPR(tr)
    tr
  }

  structure(lapply(seq_len(nTrees), function (XX) RepeatSPR(startTree, k)),
            class = 'multiPhylo')
}

SpectralClustering <- function (dat, nClusters) {
  # More efficient version of anocva::spectralClustering
  n <- ncol(dat)
  L <- diag(rowSums(dat)) - dat
  eigenVectors <- eigen(L, symmetric = TRUE)$vectors
  cluster::pam(x = eigenVectors[, n - seq_len(nClusters) + 1L],
               k = nClusters, cluster.only = TRUE)
}


LinTest <- function(k, TestSet = LinTestOneSet, nTip = 100L, nTrees = 100L,
                    i = 1L) {
  cat (".")
  if (i %% 50L == 0L) cat(' ', i, "\n")
  trees <- c(TestSet(nTip, k, nTrees), TestSet(nTip, k, nTrees))
  comparison <- CompareAllTrees(trees, slow = TRUE, verbose = FALSE)
  # Too slow to compute
  # comparison$mast <- NULL
  # comparison$masti <- NULL
  # comparison$qd <- NULL

  # NAs not supported
  comparison$nni_t <- NULL

  ClusterOK <- function (Func, ...) apply(
    vapply(comparison, Func, FUN.VALUE = integer(nTrees + nTrees), ...), 2L,
    identical, y = rep(1:2, each = nTrees))

  HClusters <- function (dat, method) {
    clusters <- hclust(as.dist(dat), method)
    cutree(clusters, k = 2L)
  }

  SClusters <- function (dat) {
    if (!is.null(dat)) {
      if (max(dat) <= 1L) dat <- 1 - dat else dat <- max(dat) - dat
      SpectralClustering(as.matrix(dat), 2L)
    } else {
      rep(0L, nTrees + nTrees)
    }
  }

  cbind(spc = ClusterOK(SClusters),
        pam = ClusterOK(cluster::pam, k = 2L, diss = TRUE, cluster.only = TRUE),
        h.cmp = ClusterOK(HClusters, method = 'complete'),
        h.sng = ClusterOK(HClusters, method = 'single'),
        h.avg = ClusterOK(HClusters, method = 'average')
        )
}

compAllMethods <- c('rf', 'icrf',
                    'jnc2', 'jnc4', 'jco2', 'jco4',
                    'pid', 'msid', 'cid', 'qd', 'nye',
                    'ms', 'mast', 'masti',
                    'nni_l', 'nni_L', 'nni_U', 'nni_u', 'spr',
                    'tbr_l', 'tbr_u', 'path', 'es', 'kc')
linTestReturn <- matrix(FALSE, nrow = length(compAllMethods), ncol = 5L,
                        dimnames = list(compAllMethods,
                                        c('spc', 'pam', 'h.cmp', 'h.sng', 'h.avg')))
runLinTestReturn <- t(0 * linTestReturn)

RunLinTest <- function (k, TestSet = LinTestOneSet,
                        nTip = 100L, nTrees = 100L, replicates= 1000L) {
  message("\n  k = ", k)
  colSums(aperm(vapply(seq_len(replicates), function (XX)
    LinTest(k * nTip / 100, TestSet, nTip, nTrees, XX), linTestReturn)),
                c(3, 1, 2))
}

message("Lin et al. (2012) test one")
linTestOneResults <-
vapply(seq(30L, 70L, by = 10L), RunLinTest, TestSet = LinTestOneSet,
       nTip = nTip, nTrees = nTrees, replicates = replicates,
       FUN.VALUE = runLinTestReturn)
usethis::use_data(linTestOneResults, compress = 'xz', overwrite = TRUE)

message("Lin et al. (2012) test two")
linTestTwoResults <-
vapply(seq(10L, 40L, by = 10L), RunLinTest, TestSet = LinTestTwoSet,
       nTip = nTip, nTrees = nTrees, replicates = replicates,
       FUN.VALUE = runLinTestReturn)
usethis::use_data(linTestTwoResults, compress = 'xz', overwrite = TRUE)

message("SPR cluster recovery test")
linTestSPRResults <-
vapply(seq(30L, 70L, by = 10L), RunLinTest, TestSet = LinTestSPRSet,
       nTip = nTip, nTrees = nTrees, replicates = replicates,
       FUN.VALUE = runLinTestReturn)
usethis::use_data(linTestSPRResults, compress = 'xz', overwrite = TRUE)
ms609/TreeDistData documentation built on May 21, 2021, 6:53 a.m.