tests/testthat/test-cpr_rand_test.R

# Make objects used across multiple tests ----

# keep in a list for easy cleanup
dat <- list()

# comm in tibble format
dat$comm_tbl <- biod_example$comm |>
  tibble::rownames_to_column("site") |>
  tibble::as_tibble()

# comm in tibble format, non-default site name
dat$comm_tbl_2 <- biod_example$comm |>
  tibble::rownames_to_column("sample") |>
  tibble::as_tibble()

# comm in matrix format
dat$comm_mat <- as.matrix(biod_example$comm)

# For testing that results are same regardless of input type
# need to have same random seed each time
set.seed(12345)
dat$res_from_df <- cpr_rand_test(
  biod_example$comm, biod_example$phy,
  null_model = "swap", n_reps = 10, quiet = TRUE
)
set.seed(12345)
dat$res_from_tbl <- cpr_rand_test(
  dat$comm_tbl, biod_example$phy,
  null_model = "swap", n_reps = 10, tbl_out = FALSE, quiet = TRUE
)
set.seed(12345)
dat$res_from_mat <- cpr_rand_test(
  dat$comm_mat, biod_example$phy,
  null_model = "swap", n_reps = 10, quiet = TRUE
)

# For testing output format
dat$res_tbl_from_df <- cpr_rand_test(
  biod_example$comm, biod_example$phy,
  null_model = "swap", n_reps = 1, metrics = "pd", tbl_out = TRUE, quiet = TRUE
)
dat$res_tbl_from_tbl <- cpr_rand_test(
  dat$comm_tbl, biod_example$phy,
  null_model = "swap", n_reps = 1, metrics = "pd", quiet = TRUE
)
dat$res_df_from_tbl <- cpr_rand_test(
  dat$comm_tbl, biod_example$phy,
  null_model = "swap", n_reps = 1, metrics = "pd", tbl_out = FALSE, quiet = TRUE
)

# Run tests ----

#' @srrstats {G5.2, G5.2a, G5.2b, UL7.0} tests failure if input is not valid and
#' checks warning messages
#' @srrstats {G5.0} Use standard dataset for testing (Biodiverse dataset)
test_that("Input is valid", {
  expect_error(
    cpr_rand_test(
      biod_example$comm, biod_example$phy,
      n_reps = -10, null_model = "swap", quiet = TRUE
    ),
    "'n_reps' must be > 0"
  )
  expect_error(
    cpr_rand_test(
      biod_example$comm, biod_example$phy,
      n_iterations = -10, null_model = "swap", quiet = TRUE
    ),
    "'n_iterations' must be > 0"
  )
  expect_error(
    cpr_rand_test(
      10, biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "'comm' must be of class 'data\\.frame' or 'matrix'"
  )
  expect_error(
    cpr_rand_test(
      biod_example$comm, NA,
      metrics = "pd", null_model = "swap", quiet = TRUE
    ),
    "'phy' must be a list of class 'phylo'"
  )
  expect_error(
    cpr_rand_test(
      biod_example$comm, biod_example$phy,
      metrics = "pg", null_model = "swap", quiet = TRUE
    ),
    "'metrics' may only include 'pd', 'rpd', 'pe', or 'rpe'"
  )
  # input data too small
  comm_small <- biod_example$comm[1:3, ]
  set.seed(12345)
  comm_small <- matrix(
    data = sample(c(0, 1), replace = TRUE, size = length(unlist(comm_small))),
    nrow = nrow(comm_small),
    ncol = ncol(comm_small)
  )
  rownames(comm_small) <- rownames(biod_example$comm[1:3, ])
  colnames(comm_small) <- colnames(biod_example$comm[1:3, ])
  comm_small <- comm_small[, colSums(comm_small) > 0]
  expect_error(
    cpr_rand_test(
      comm_small, biod_example$phy,
      null_model = "swap", quiet = TRUE
    ),
    "'comm' must include at least 5 sites"
  )
  expect_error(
    cpr_rand_test(
      biod_example$comm,
      ape::keep.tip(biod_example$phy, c("sp1", "sp2")),
      null_model = "swap", quiet = TRUE
    ),
    "'phy' and 'comm' must share at least 5 species in common"
  )
  # comm with numeric species names
  comm_num_names <- biod_example$comm
  colnames(comm_num_names) <- seq_len(dim(comm_num_names)[2])
  phy_num_names <- biod_example$phy
  phy_num_names$tip.label <- colnames(comm_num_names) # nolint
  expect_error(
    cpr_rand_test(as.matrix(comm_num_names),
      phy_num_names,
      metrics = "pd", null_model = "swap"
    ),
    "Column names of 'comm' changed after conversion from matrix to dataframe\\. Do any column names start with a number" # nolint
  )
  # comm with duplicated species names
  comm_dup_names <- biod_example$comm
  colnames(comm_dup_names)[2] <- "sp1"
  expect_error(
    cpr_rand_test(comm_dup_names,
      biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "'comm' must have unique column names"
  )
  # phy with duplicated species names
  phy_dup_names <- biod_example$phy
  phy_dup_names$tip.label[2] <- "sp1"
  expect_error(
    cpr_rand_test(
      biod_example$comm, phy_dup_names,
      null_model = "swap",
      quiet = TRUE
    ),
    "All tip labels in 'phy' must be unique"
  )
  # comm with non-numeric data
  comm_non_numeric <- biod_example$comm
  comm_non_numeric[1, 1] <- "a"
  expect_error(
    cpr_rand_test(
      comm_non_numeric, biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "All columns of 'comm' must be numeric"
  )
  # comm with NA data
  comm_with_na <- biod_example$comm
  comm_with_na[1, 1] <- NA
  expect_error(
    cpr_rand_test(
      comm_with_na, biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "No missing values allowed in 'comm'"
  )
  # comm with negative data
  comm_with_negative <- biod_example$comm
  comm_with_negative[1, 1] <- -10
  expect_error(
    cpr_rand_test(
      comm_with_negative,
      biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "No negative values allowed in 'comm'"
  )
  #' @srrstats {G2.11} non-vector inputs
  comm_with_nonvec <- data.frame(biod_example$comm)
  attributes(comm_with_nonvec[, 1]) <- list(bar = "foo")
  expect_false(
    is.vector(comm_with_nonvec[, 1])
  )
  expect_error(
    cpr_rand_test(
      comm_with_nonvec, biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "All columns of 'comm' must be numeric"
  )
  #' @srrstats {G2.16} infinite values
  comm_with_infinite <- biod_example$comm
  comm_with_infinite[1, 1] <- Inf
  expect_error(
    cpr_rand_test(
      comm_with_infinite,
      biod_example$phy,
      metrics = "pd", null_model = "swap"
    ),
    "No infinite values allowed in 'comm'"
  )
  expect_error(
    cpr_rand_test(
      dat$comm_tbl,
      biod_example$phy,
      metrics = "pd", site_col = "sample", null_model = "swap"
    ),
    "'site_col' must be one of the column names of 'comm'"
  )
  #' @srrstats {UL1.4} Check assumptions made with regard to input data
  # (negative branch lengths)
  phy_neg <- biod_example$phy
  phy_neg$edge.length[1] <- -1 # nolint
  expect_error(
    cpr_rand_test(
      biod_example$comm, phy_neg,
      null_model = "swap",
      quiet = TRUE
    ),
    "'phy' may not have negative branchlengths"
  )
  phy_inf <- biod_example$phy
  phy_inf$edge.length[1] <- Inf # nolint
  expect_error(
    cpr_rand_test(
      biod_example$comm, phy_inf,
      null_model = "swap",
      quiet = TRUE
    ),
    "'phy' may not have infinite branchlengths"
  )
  # check column with all zeros
  comm_one_zero_col <- biod_example$comm
  comm_one_zero_col[, 1] <- 0
  expect_error(
    cpr_rand_test(
      comm_one_zero_col, biod_example$phy,
      null_model = "swap", quiet = TRUE
    ),
    "Every species in 'comm' must occur in at least one site"
  )
  # check row with all zeros
  comm_one_zero_row <- biod_example$comm
  comm_one_zero_row[1, ] <- 0
  expect_error(
    cpr_rand_test(
      comm_one_zero_row, biod_example$phy,
      null_model = "swap", quiet = TRUE
    ),
    "Every site in 'comm' must have at least once species"
  )
  # warning for abundance data
  expect_warning(
    cpr_rand_test(
      biod_example$comm,
      biod_example$phy,
      null_model = "swap", n_reps = 10, n_iterations = 10
    ),
    "Abundance data detected"
  )
  expect_warning(
    cpr_rand_test(
      biod_example$comm,
      quiet = TRUE,
      biod_example$phy, null_model = "swap", n_reps = 10, n_iterations = 10
    ),
    NA
  )
  # input data with too many zeros or 1s
  # - make comm with >99.5% zeros
  set.seed(12345)
  comm_zero_heavy <- matrix(
    data = sample.int(
      2,
      replace = TRUE,
      prob = c(0.005, 0.995), size = length(unlist(biod_example$comm))
    ),
    nrow = nrow(biod_example$comm),
    ncol = ncol(biod_example$comm)
  )
  comm_zero_heavy[comm_zero_heavy == 2] <- 0
  # - make sure it doesn't break other rules (at least one species / site)
  comm_zero_heavy[1, ] <- 1
  comm_zero_heavy[, 1] <- 1
  rownames(comm_zero_heavy) <- rownames(biod_example$comm)
  colnames(comm_zero_heavy) <- colnames(biod_example$comm)
  expect_warning(
    cpr_rand_test(
      comm_zero_heavy, biod_example$phy,
      null_model = "r00", metrics = "pd"
    ),
    "'comm' is > 95% absences (zeros). Be sure that 'n_reps' and 'n_iterations' are sufficiently large to ensure adequate mixing of random communities", # nolint
    fixed = TRUE
  )
  # - quiet = TRUE should suppress the warning
  expect_warning(
    cpr_rand_test(
      comm_zero_heavy, biod_example$phy,
      null_model = "r00", metrics = "pd", quiet = TRUE
    ),
    NA
  )
  # - convert comm_zero_heavy to comm_one_heavy,
  # making sure it doesn't break other rules (at least one species / site)
  comm_one_heavy <- comm_zero_heavy
  comm_one_heavy[comm_one_heavy == 1] <- 2
  comm_one_heavy[comm_one_heavy == 0] <- 1
  comm_one_heavy[comm_one_heavy == 2] <- 0
  comm_one_heavy[1, ] <- 1
  comm_one_heavy[, 1] <- 1
  expect_warning(
    cpr_rand_test(
      comm_one_heavy, biod_example$phy,
      null_model = "r00", metrics = "pd"
    ),
    "'comm' is > 95% presences (values > 1). Be sure that 'n_reps' and 'n_iterations' are sufficiently large to ensure adequate mixing of random communities", # nolint
    fixed = TRUE
  )
  # - quiet = TRUE should suppress the warning
  expect_warning(
    cpr_rand_test(
      comm_one_heavy, biod_example$phy,
      null_model = "r00", metrics = "pd", quiet = TRUE
    ),
    NA
  )
})

test_that("Results are same regardless of input type", {
  expect_equal(dat$res_from_mat, dat$res_from_df)
  expect_equal(dat$res_from_mat, dat$res_from_tbl)
  expect_equal(dat$res_from_df, dat$res_from_tbl)
})

test_that("Results are same regardless of pres/abs or abundance input", {
  # make presence-absence community
  comm_pa <- apply(biod_example$comm, 2, function(x) ifelse(x > 0, 1, 0))
  # pd
  set.seed(12345)
  res_abun_pd <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "pd", quiet = TRUE
  )
  set.seed(12345)
  res_pa_pd <- cpr_rand_test(
    comm_pa, biod_example$phy,
    null_model = "swap",
    n_reps = 10, metrics = "pd"
  )
  expect_equal(res_abun_pd, res_pa_pd)
  # pe
  set.seed(12345)
  res_abun_pe <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "pe", quiet = TRUE
  )
  set.seed(12345)
  res_pa_pe <- cpr_rand_test(
    comm_pa, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "pe"
  )
  expect_equal(res_abun_pe, res_pa_pe)
  # rpd
  set.seed(12345)
  res_abun_rpd <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "rpd", quiet = TRUE
  )
  set.seed(12345)
  res_pa_rpd <- cpr_rand_test(
    comm_pa, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "rpd"
  )
  expect_equal(res_abun_rpd, res_pa_rpd)
  # rpe
  set.seed(12345)
  res_abun_rpe <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "rpe", quiet = TRUE
  )
  set.seed(12345)
  res_pa_rpe <- cpr_rand_test(
    comm_pa, biod_example$phy,
    null_model = "swap", n_reps = 10, metrics = "rpe"
  )
  expect_equal(res_abun_rpe, res_pa_rpe)
  # all
  set.seed(12345)
  res_abun <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "swap", n_reps = 10, quiet = TRUE
  )
  set.seed(12345)
  res_pa <- cpr_rand_test(
    comm_pa, biod_example$phy,
    null_model = "swap", n_reps = 10
  )
  expect_equal(res_abun, res_pa)
})

#' @srrstats {UL1.2} Check for default-looking column and rownames
test_that("Default column and rownames are detected", {
  # Make a dummy community dataframe and phylogeny
  set.seed(12345)
  abuns <- runif(10 * 8, 0, 10) |> as.integer()
  df_default_rows <- data.frame(
    sp1 = abuns[1:10],
    sp2 = abuns[11:20],
    sp3 = abuns[21:30],
    sp4 = abuns[31:40],
    sp5 = abuns[41:50],
    sp6 = abuns[51:60],
    sp7 = abuns[61:70],
    sp8 = abuns[71:80]
  )
  phy_default <- ape::keep.tip(biod_example$phy, paste0("sp", 1:8))
  expect_error(
    cpr_rand_test(df_default_rows, phy_default, null_model = "swap"),
    "'comm' cannot have default row names \\(consecutive integers from 1 to the number of rows\\)" # nolint
  )
})

#' @srrstats {UL7.5, UL7.5a} Test batch processing, show that results don't
#'   differ between sequential and parallel
test_that("Parallelization decreases calculation time", {
  # Change back to sequential when done (including on failure)
  on.exit(future::plan(future::sequential), add = TRUE)

  # This test is skipped by default because of its long run time.
  # To include the test locally, first run
  # withr::local_envvar(CANAPER_EXTENDED_TESTS = "true") # nolint
  # To include the test in CI (github actions), include the phrase
  # 'run-extended' in the commit
  skip_extended()

  # Set future resolution to sequential (no parallelization)
  future::plan(future::sequential)
  tictoc::tic.clearlog()
  tictoc::tic()
  set.seed(12345)
  seq_res <- cpr_rand_test(
    acacia$comm, acacia$phy,
    null_model = "curveball",
    n_iterations = 100, n_reps = 200, quiet = TRUE
  )
  tictoc::toc(log = TRUE, quiet = TRUE)

  # Set future resolution to parallelized, with 2 workers
  future::plan(future::multisession, workers = 2)
  tictoc::tic()
  set.seed(12345)
  parallel_res <- cpr_rand_test(
    acacia$comm, acacia$phy,
    null_model = "curveball",
    n_iterations = 100, n_reps = 200, quiet = TRUE
  )
  tictoc::toc(log = TRUE, quiet = TRUE)
  log_list <- tictoc::tic.log(format = FALSE)
  tictoc::tic.clearlog()
  # Check times
  elapsed_time_seq <- log_list[[1]]$toc - log_list[[1]]$tic
  elapsed_time_parallel <- log_list[[2]]$toc - log_list[[2]]$tic

  # Change back to sequential
  future::plan(future::sequential)

  # Expect sequential to take longer
  expect_gt(elapsed_time_seq, elapsed_time_parallel)
  expect_lt(elapsed_time_parallel, elapsed_time_seq)
})

#' @srrstats {UL7.5, UL7.5a} Test batch processing, show that results don't
#'   differ between sequential and parallel
test_that("Seeds work across sequential and parallel", {
  # Change back to sequential when done (including on failure)
  on.exit(future::plan(future::sequential), add = TRUE)

  # Set future resolution to sequential (no parallelization)
  future::plan(future::sequential)
  set.seed(12345)
  seq_res_1 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "curveball", n_iterations = 10, n_reps = 10, quiet = TRUE
  )
  set.seed(67890)
  seq_res_2 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "curveball",
    n_iterations = 10, n_reps = 10, quiet = TRUE
  )
  set.seed(12345)
  seq_res_3 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "curveball",
    n_iterations = 10, n_reps = 10, quiet = TRUE
  )

  expect_true(isTRUE(all.equal(seq_res_1, seq_res_3)))
  expect_false(isTRUE(all.equal(seq_res_1, seq_res_2)))

  # Set future resolution to parallelized, with 2 workers
  future::plan(future::multisession, workers = 2)
  set.seed(12345)
  par_res_1 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "curveball",
    n_iterations = 10, n_reps = 10, quiet = TRUE
  )
  set.seed(67890)
  par_res_2 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "curveball",
    n_iterations = 10, n_reps = 10, quiet = TRUE
  )
  set.seed(12345)
  par_res_3 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    null_model = "curveball",
    n_iterations = 10, n_reps = 10, quiet = TRUE
  )

  expect_true(isTRUE(all.equal(par_res_1, par_res_3)))
  expect_false(isTRUE(all.equal(par_res_1, par_res_2)))
  expect_true(isTRUE(all.equal(seq_res_1, par_res_1)))

  # Change back to sequential
  future::plan(future::sequential)
})

#' @srrstats {G5.4, G5.4b, G5.5} Correctness tests
# Make sure results from canaper match those of Biodiverse
# (for non-random results only)
test_that("Output is same as when calculated with Biodiverse", {
  # Calculate observed PD, PE, etc using biodiverse test data
  set.seed(12345)
  res_compare <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    n_reps = 1, null_model = "swap", quiet = TRUE
  ) |>
    tibble::rownames_to_column("site") |>
    tibble::as_tibble() |>
    dplyr::select(site, matches("_obs$")) |>
    # Join on independently calculated results from Biodiverse
    dplyr::left_join(biod_results, by = "site") |>
    # Make sure no values are NA
    assertr::assert(assertr::not_na, dplyr::everything())
  expect_equal(res_compare$pd_obs, res_compare$pd_biodiv)
  expect_equal(res_compare$pd_alt_obs, res_compare$pd_alt_biodiv)
  expect_equal(res_compare$rpd_obs, res_compare$rpd_biodiv)
  expect_equal(res_compare$pe_obs, res_compare$pe_biodiv)
  expect_equal(res_compare$pe_alt_obs, res_compare$pe_alt_biodiv)
  expect_equal(res_compare$rpe_obs, res_compare$rpe_biodiv)
})

test_that("Output is different with diff. seeds, and same w/same seed", {
  set.seed(12345)
  res_1 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    n_reps = 25, null_model = "curveball", quiet = TRUE
  )
  set.seed(67890)
  res_2 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    n_reps = 25, null_model = "curveball", quiet = TRUE
  )
  set.seed(12345)
  res_3 <- cpr_rand_test(
    biod_example$comm, biod_example$phy,
    n_reps = 25, null_model = "curveball", quiet = TRUE
  )
  expect_false(isTRUE(all.equal(res_1, res_2)))
  expect_true(isTRUE(all.equal(res_1, res_3)))
})

test_that("Output is formatted as expected", {
  # Use expect equal() with class() when *only* expecting a data.frame
  # (tibble inherits data.frame, tbl, and tbl_df)
  expect_equal(
    class(dat$res_from_df),
    "data.frame"
  )
  expect_s3_class(
    dat$res_tbl_from_df,
    "tbl_df"
  )
  expect_s3_class(
    dat$res_tbl_from_df,
    "tbl"
  )
  expect_s3_class(
    dat$res_tbl_from_tbl,
    "tbl_df"
  )
  expect_s3_class(
    dat$res_tbl_from_tbl,
    "tbl"
  )
  expect_equal(
    class(dat$res_df_from_tbl),
    "data.frame"
  )
  #' @srrstats {UL1.3, UL7.3} Make sure data from 'site_col' is carried through
  #' for tbl input
  res_tbl_from_tbl_2 <- cpr_rand_test(
    dat$comm_tbl_2, biod_example$phy,
    null_model = "swap",
    n_reps = 1, metrics = "pd", site_col = "sample", quiet = TRUE
  )
  expect_equal(
    dat$comm_tbl_2$sample,
    res_tbl_from_tbl_2$sample
  )
  expect_equal(
    rownames(dat$res_from_df),
    rownames(biod_example$comm)
  )
  # Run snapshot test to verify correct output
  set.seed(12345)
  expect_snapshot(
    cpr_rand_test(
      phylocom$comm, phylocom$phy,
      null_model = "swap",
      n_reps = 10, quiet = TRUE
    )
  )
})

test_that("Various randomization algorithms work", {
  set.seed(12345)
  algos <- vegan::make.commsim()
  res_list <- purrr::map(
    algos,
    ~ cpr_rand_test(
      biod_example$comm, biod_example$phy,
      null_model = ., n_iterations = 10, n_reps = 10, quiet = TRUE
    )
  )
  # All output should be data frame
  for (i in seq_along(res_list)) {
    expect_s3_class(res_list[[i]], "data.frame")
  } # All values should be numeric
  for (i in seq_along(res_list)) {
    expect_true(
      assertr::assert(
        res_list[[i]], is.numeric, dplyr::everything(),
        success_fun = assertr::success_logical
      )
    )
  }
})

test_that("Custom randomization algorithms work", {
  randomizer <- function(x, n, ...) {
    array(replicate(n, sample(x)), c(dim(x), n))
  }
  cs_object <- vegan::commsim(
    "r00_model",
    fun = randomizer, binary = TRUE,
    isSeq = FALSE, mode = "integer"
  )
  set.seed(12345)
  expect_snapshot(
    cpr_rand_test(
      phylocom$comm, phylocom$phy, cs_object,
      n_reps = 10, quiet = TRUE
    )
  )
})

# Cleanup ----
remove(dat)

Try the canaper package in your browser

Any scripts or data that you put into this service are public.

canaper documentation built on May 31, 2023, 8:39 p.m.