tests/testthat/test-bk_method.R

# library(testthat)
# library(dendextend)

context("Bk method (FM Index) between two trees")


test_that("sort_2_clusters_vectors works", {
  suppressWarnings(RNGversion("3.5.0"))
  set.seed(23235)
  ss <- sample(1:150, 4)
  hc1 <- hclust(dist(datasets::iris[ss, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[ss, -5]), "single")
  # dend1 <- as.dendrogram(hc1)
  # dend2 <- as.dendrogram(hc2)
  #    cutree(dend1)

  A1_clusters <- cutree(hc1, k = 3)
  A2_clusters <- sample(cutree(hc1, k = 3))

  sorted_As <- sort_2_clusters_vectors(A1_clusters, A2_clusters, assume_sorted_vectors = TRUE) # no sorting
  expect_false(identical(sorted_As[[1]], sorted_As[[2]]))

  sorted_As <- sort_2_clusters_vectors(A1_clusters, A2_clusters, assume_sorted_vectors = FALSE) # Sorted
  expect_true(identical(sorted_As[[1]], sorted_As[[2]]))
  
  # test edge case warnings
  A2_clusters <- A2_clusters[1:3]
  expect_warning(
     result <- sort_2_clusters_vectors(A1_clusters, A2_clusters, warn = T)
  )
  expect_identical(A1_clusters, result[[1]])
  expect_identical(A2_clusters, result[[2]])
  
  names(A2_clusters) <- NULL
  expect_warning(
     result <- sort_2_clusters_vectors(A1_clusters, A2_clusters, warn = T)
  )
  expect_identical(A1_clusters, result[[1]])
  expect_identical(A2_clusters, result[[2]])
  
  # create custom object with different length of names as length of object to trigger warning
  custom_object <- list(a = 1, b = 2, c = 3) 
  class(custom_object) <- "custom_class"
  names.custom_class <<- function(x) {
     return(c("Name1", "Name2", "Name3", "Name4", "Name5"))
  }
  expect_warning(sort_2_clusters_vectors(custom_object, custom_object, warn = T))
})




test_that("FM_index_R works", {
  suppressWarnings(RNGversion("3.5.0"))

  #    set.seed(23235)
  ss <- TRUE # sample(1:150, 10 )
  hc1 <- hclust(dist(datasets::iris[ss, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[ss, -5]), "single")
  # dend1 <- as.dendrogram(hc1)
  # dend2 <- as.dendrogram(hc2)
  #    cutree(dend1)

  # FM index of a cluster with himself is 1:
  expect_equivalent(FM_index_R(cutree(hc1, k = 3), cutree(hc1, k = 3)), 1)
  # removing the attr - it is EXACTLY 1: (but NOT 1L)
  expect_identical(as.vector(FM_index_R(cutree(hc1, k = 3), cutree(hc1, k = 3))), 1)

  # sorting of the clusters based on their names:
  set.seed(1341)
  expect_false(identical(
    as.vector(
      FM_index_R(cutree(hc1, k = 3),
        sample(cutree(hc1, k = 3)),
        assume_sorted_vectors = TRUE
      )
    ), 1
  ))
  # It actually becomes: 0.38037
  # but if we leave sorting as TRUE, we will get 1:

  expect_true(identical(
    as.vector(FM_index_R(cutree(hc1, k = 3),
      sample(cutree(hc1, k = 3)),
      assume_sorted_vectors = FALSE
    )), 1
  ))
  # it is actually the default:
  expect_true(identical(
    as.vector(FM_index_R(
      cutree(hc1, k = 3),
      sample(cutree(hc1, k = 3))
    )), 1
  ))


  # we can get a range of FM inexes using the following:
  fo <- function(k) FM_index_R(cutree(hc1, k), cutree(hc2, k))
  #    dput(round(unname(sapply(1:4, fo)),2))

  expect_identical(
    round(unname(sapply(1:4, fo)), 2),
    c(1, 0.71, 0.81, 0.75)
  )
  #    ks <- 1:150
  #    plot(sapply(ks, fo)~ ks, type = "b", main = "Bk plot for the datasets::iris dataset")
  #

  set.seed(234234)
  tmp_index <- FM_index_R(cutree(hc1, k = 3), sample(cutree(hc1, k = 3)), assume_sorted_vectors = TRUE)
  expect_identical(round(as.vector(tmp_index), 2), 0.38)
  expect_identical(round(attr(tmp_index, "E_FM"), 2), 0.37)
  expect_identical(round(sqrt(attr(tmp_index, "V_FM")), 3), 0.008)
  
  # test case where a cluster with an NA value is passed in
  hc1_with_na = cutree(hc1, k = 3)
  hc1_with_na[1] = NA
  expect_warning(
     FM_index_R(hc1_with_na, cutree(hc1, k = 3), warn = T)
  )
  
  # create custom class to pass in which triggers error otherwise not possible
  x <<- c(1, 2, 3, 4, 5) 
  class(x) <<- "x"
  length.x <<- function(x) 1
  y <<- c(1, 2, 3, 4, 5) 
  class(y) <<- "y"
  length.y <<- function(y) 1
  expect_error(
     FM_index_R(x, y, assume_sorted_vectors = T)
  )

})



test_that("FM_index works", {
  suppressWarnings(RNGversion("3.5.0"))
  #    set.seed(23235)
  ss <- TRUE # sample(1:150, 10 )
  hc1 <- hclust(dist(datasets::iris[ss, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[ss, -5]), "single")
  # dend1 <- as.dendrogram(hc1)
  # dend2 <- as.dendrogram(hc2)
  #    cutree(dend1)

  # FM index of a cluster with himself is 1:
  expect_equivalent(FM_index(cutree(hc1, k = 3), cutree(hc1, k = 3)), 1)
  # removing the attr - it is EXACTLY 1: (but NOT 1L)
  expect_identical(as.vector(FM_index(cutree(hc1, k = 3), cutree(hc1, k = 3))), 1)

  # sorting of the clusters based on their names:
  set.seed(1341)
  expect_false(identical(
    as.vector(
      FM_index(cutree(hc1, k = 3),
        sample(cutree(hc1, k = 3)),
        assume_sorted_vectors = TRUE
      )
    ), 1
  ))
  # It actually becomes: 0.38037
  # but if we leave sorting as TRUE, we will get 1:

  expect_true(identical(
    as.vector(FM_index(cutree(hc1, k = 3),
      sample(cutree(hc1, k = 3)),
      assume_sorted_vectors = FALSE
    )), 1
  ))
  # it is actually the default:
  expect_true(identical(
    as.vector(FM_index(
      cutree(hc1, k = 3),
      sample(cutree(hc1, k = 3))
    )), 1
  ))


  # we can get a range of FM inexes using the following:
  fo <- function(k) FM_index(cutree(hc1, k), cutree(hc2, k))
  #    dput(round(unname(sapply(1:4, fo)),2))

  expect_identical(
    round(unname(sapply(1:4, fo)), 2),
    c(1, 0.71, 0.81, 0.75)
  )
  #    ks <- 1:150
  #    plot(sapply(ks, fo)~ ks, type = "b", main = "Bk plot for the datasets::iris dataset")
  
})







test_that("FM_index_permutation works", {
  set.seed(23235)
  ss <- TRUE # sample(1:150, 10 )
  hc1 <- hclust(dist(datasets::iris[ss, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[ss, -5]), "single")
  # dend1 <- as.dendrogram(hc1)
  # dend2 <- as.dendrogram(hc2)
  #    cutree(dend1)

  A1_clusters <- cutree(hc1, k = 3)
  A2_clusters <- A1_clusters

  R <- 10
  set.seed(414130)
  FM_index_H0 <- replicate(R, FM_index_permutation(A1_clusters, A2_clusters)) # can take 10 sec

  expect_identical(
    round(mean(FM_index_H0), 2),
    0.37
  )
  expect_identical(
    round(sd(FM_index_H0), 2),
    0.01
  )
})








test_that("Bk works", {
  set.seed(23235)
  ss <- TRUE # sample(1:150, 10 )
  hc1 <- hclust(dist(datasets::iris[ss, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[ss, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)
  #    cutree(tree1)

  expect_identical(
    Bk(hc1, hc2, k = 3),
    Bk(dend1, dend2, k = 3, )
  )

  expect_identical(
    Bk(hc1, hc2, k = 3),
    Bk(dend1, dend2, k = 3)
  )

  expect_identical(
    round(as.numeric(Bk(hc1, hc2, k = 3)), 3),
    0.806
  )
  expect_identical(
    round(as.numeric(Bk(dend1, dend2, k = 3)), 3),
    0.806
  )


  set.seed(23234535)
  expect_identical(
    round(
      Bk_permutations(dend1,
        dend2,
        k = 3, R = 2
      )[[1]], 2
    ),
    c(.45, .44)
  )

  # test case where k is not specified
  expect_identical(
    Bk(dend1, dend2),
    Bk(dend1, dend2, k = 2:(nleaves(dend1) - 1))
  )

  # test case where different number of items are provided
  hc1 <- hclust(dist(datasets::iris[1:149, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[1:150, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)
  expect_error(Bk(dend1, dend2, warn = T))
  
  # test case where different labels are provided
  hc1 <- hclust(dist(datasets::iris[1:149, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[2:150, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)
  expect_error(Bk(dend1, dend2, warn = T))
  
  # make computationally easier for permutations
  hc1 <- hclust(dist(datasets::iris[1:15, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[1:15, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)
  
  # test case where k is not specified
  expect_no_error(
     Bk_permutations(dend1, dend2)
  )
  
  # test case where different number of items are provided
  hc1 <- hclust(dist(datasets::iris[1:29, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[1:30, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)
  expect_error(Bk_permutations(dend1, dend2, warn = T))
  
  # test case where different labels are provided
  hc1 <- hclust(dist(datasets::iris[1:29, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[2:30, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)
  expect_error(Bk_permutations(dend1, dend2, warn = T))
})


test_that("Bk_plot works", {

  ss <- TRUE
  hc1 <- hclust(dist(datasets::iris[1:15, -5]), "com")
  hc2 <- hclust(dist(datasets::iris[1:15, -5]), "single")
  dend1 <- as.dendrogram(hc1)
  dend2 <- as.dendrogram(hc2)

  # simply check if the function can run without error
  expect_no_error(
    Bk_plot(dend1, dend2, try_cutree_hclust = T, add_E = T, rejection_line_asymptotic = T, rejection_line_permutation = T, p.adjust.methods = "bonferroni")
  )
  expect_no_error(
    Bk_plot(dend1, dend2, try_cutree_hclust = T, add_E = F, rejection_line_asymptotic = T, rejection_line_permutation = T)
  )
  
  # case where object which cant be converted to phylo is passed in
  x <- matrix(1:4, nrow = 2)
  expect_error(
    Bk_plot(x, dend2, try_cutree_hclust = T, add_E = F, rejection_line_asymptotic = T, rejection_line_permutation = T)
  )
  expect_error(
    Bk_plot(dend1, x, try_cutree_hclust = T, add_E = F, rejection_line_asymptotic = T, rejection_line_permutation = T)
  )
})

Try the dendextend package in your browser

Any scripts or data that you put into this service are public.

dendextend documentation built on April 4, 2025, 4:40 a.m.