tests/testthat/test.gene_clustering.R

# Set verbosity to 0
set_verbosity(0)

load_example_dataset("7871581/files/pbmc3k_medium")

pbmc3k_medium_clust <- select_genes(data = pbmc3k_medium, 
                                    distance_method = "pearson")

test_that("Checking output slots generated by gene_clustering() using keep_nn as FALSE", {
  # Create output directory
  tmp_dir <- tempdir()
  dir.create(tmp_dir, recursive = TRUE, showWarnings = FALSE)
  
  ## A rather stringent version
  res_no_keep_nn <- gene_clustering(object = pbmc3k_medium_clust,
                                    inflation = 1.8,
                                    keep_nn = FALSE,
                                    s = 5,
                                    threads = 6,
                                    output_path = tmp_dir)
  
  # =======================================================
  # Test name of temporary files
  expect_true(file.exists(file.path(res_no_keep_nn@parameters$output_path,
                                    paste0(res_no_keep_nn@parameters$name,
                                           ".input_mcl.txt"))))
  expect_true(file.exists(file.path(res_no_keep_nn@parameters$output_path,
                                    paste0(res_no_keep_nn@parameters$name,
                                           ".mcl_out.txt"))))
  
  expect_equal(length(res_no_keep_nn@parameters$name), 1)
  expect_true(is.character(res_no_keep_nn@parameters$name))
  
  
  # =======================================================
  # Test matrix in res_no_keep_nn@data
  expect_equal(round(sum(res_no_keep_nn@data), 0), 70172)
  expect_equal(round(mean(res_no_keep_nn@data), 6), 0.639414)
  expect_equal(round(median(res_no_keep_nn@data), 6), 0)
  expect_equal(round(sd(res_no_keep_nn@data), 6), 1.329959)

  expect_equal(round(sum(colMeans(res_no_keep_nn@data)), 1), 230.8)
  expect_equal(round(sum(rowMeans(res_no_keep_nn@data)), 4), 194.382)
  
  expect_equal(ncol(res_no_keep_nn@data), 361)
  expect_equal(head(colnames(res_no_keep_nn@data)), c("GATCTACTGGTGAG-1", "ACAGTGACTCACCC-1", "AGACGTACAGAGGC-1", 
                                                      "GACGTAACCTGTGA-1", "TATACAGATCCAGA-1", "CGGATAACAGCTCA-1"))
  
  expect_equal(tail(colnames(res_no_keep_nn@data)), c("GGCATATGGGGAGT-1", "TTACGTACGTTCAG-1", "GGAACACTTCAGAC-1", 
                                                      "ATCATCTGACACCA-1", "ACGAACTGGCTATG-1", "TAACACCTTGTTTC-1"))
  
  expect_equal(nrow(res_no_keep_nn@data), 304)
  
  expect_equal(nrow(res_no_keep_nn@data), length(get_genes(res_no_keep_nn)))
  expect_equal(rownames(res_no_keep_nn@data), get_genes(res_no_keep_nn))
  expect_equal(rownames(res_no_keep_nn@data), unlist(res_no_keep_nn@gene_clusters, use.names = F))
  expect_equal(nrow(res_no_keep_nn@data), length(get_genes(res_no_keep_nn)))
  expect_equal(nrow(res_no_keep_nn@data), length(unlist(res_no_keep_nn@gene_clusters)))
  
  expect_equal(round(sum(res_no_keep_nn@data^2), 1), 238981.3)
  
  expect_true(is.matrix(res_no_keep_nn@data))
  
  expect_equal(length(res_no_keep_nn@data), 109744)
  
  
  # =======================================================
  # Test observed distances in res_no_keep_nn@dbf_output$dknn
  expect_equal(round(sum(res_no_keep_nn@dbf_output$dknn), 3), 1324.35)
  expect_equal(round(mean(res_no_keep_nn@dbf_output$dknn), 7), 0.8650231)
  expect_equal(round(median(res_no_keep_nn@dbf_output$dknn), 7), 0.886363)
  expect_equal(round(sd(res_no_keep_nn@dbf_output$dknn), 6), 0.069147)
  
  expect_equal(length(res_no_keep_nn@dbf_output$dknn), 1531)
  
  expect_equal(round(sum(res_no_keep_nn@dbf_output$dknn^2), 2), 1152.91)
  
  # =======================================================
  # Test simulated distances in res_no_keep_nn@dbf_output$simulated_dknn
  expect_equal(round(sum(res_no_keep_nn@dbf_output$simulated_dknn), 3), 1335.374)
  expect_equal(round(mean(res_no_keep_nn@dbf_output$simulated_dknn), 7), 0.8722235)
  expect_equal(round(median(res_no_keep_nn@dbf_output$simulated_dknn), 7), 0.8722685)
  expect_equal(round(sd(res_no_keep_nn@dbf_output$simulated_dknn), 6), 0.008386)
  
  expect_equal(length(res_no_keep_nn@dbf_output$simulated_dknn), 1531)
  
  expect_equal(round(sum(res_no_keep_nn@dbf_output$simulated_dknn^2), 2), 1164.85)

  expect_true(is.numeric(res_no_keep_nn@dbf_output$simulated_dknn))
  
  # =======================================================
  # Test critical distance in res_no_keep_nn@critical_distance
  expect_equal(round(res_no_keep_nn@dbf_output$critical_distance, 7), 0.8369334)
  expect_equal(length(res_no_keep_nn@dbf_output$critical_distance), 1)
  
  
  # =======================================================
  # Test gene clusters in res_no_keep_nn@gene_clusters
  expect_equal(sum(as.numeric(names(res_no_keep_nn@gene_clusters))), 136)
  expect_equal(names(res_no_keep_nn@gene_clusters), as.character(1:16))
  expect_equal(length(unlist(res_no_keep_nn@gene_clusters)), 304)
  expect_equal(length(unlist(res_no_keep_nn@gene_clusters$`1`)), 63)
  expect_equal(length(unlist(res_no_keep_nn@gene_clusters$`2`)), 37)
  expect_equal(length(unlist(res_no_keep_nn@gene_clusters$`3`)), 34)
  expect_equal(length(unlist(res_no_keep_nn@gene_clusters$`4`)), 29)
  expect_equal(length(unlist(res_no_keep_nn@gene_clusters$`10`)), 12)
  
  expect_equal(unlist(res_no_keep_nn@gene_clusters, use.names = F), get_genes(res_no_keep_nn))
  expect_equal(
    unlist(res_no_keep_nn@gene_clusters$`1`, use.names = F),
    get_genes(res_no_keep_nn, cluster = 1)
  )
  expect_equal(
    unlist(res_no_keep_nn@gene_clusters$`2`, use.names = F),
    get_genes(res_no_keep_nn, cluster = 2)
  )
  expect_equal(
    unlist(res_no_keep_nn@gene_clusters$`3`, use.names = F),
    get_genes(res_no_keep_nn, cluster = 3)
  )
  expect_equal(
    unlist(res_no_keep_nn@gene_clusters$`4`, use.names = F),
    get_genes(res_no_keep_nn, cluster = 4)
  )
  expect_equal(unlist(res_no_keep_nn@gene_clusters, use.names = F), rownames(res_no_keep_nn@data))
  
  expect_true(is.list(res_no_keep_nn@gene_clusters))
  expect_true(is.character(res_no_keep_nn@gene_clusters$`1`))
  
  
  # =======================================================
  # Test size of gene clusters in res_no_keep_nn@gene_clusters_metadata$size
  expect_equal(res_no_keep_nn@gene_clusters_metadata$size, c(`1` = 63L, `2` = 37L, `3` = 34L, `4` = 29L, `5` = 26L, `6` = 24L, 
                                                             `7` = 21L, `8` = 14L, `9` = 14L, `10` = 12L, `11` = 7L, `12` = 6L, 
                                                             `13` = 5L, `14` = 5L, `15` = 4L, `16` = 3L))
  expect_true(is.integer(res_no_keep_nn@gene_clusters_metadata$size))

  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[1],
    c("1" = length(res_no_keep_nn@gene_clusters$`1`))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[2],
    c("2" = length(res_no_keep_nn@gene_clusters$`2`))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[3],
    c("3" = length(res_no_keep_nn@gene_clusters$`3`))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[4],
    c("4" = length(res_no_keep_nn@gene_clusters$`4`))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[1],
    c("1" = length(get_genes(res_no_keep_nn, cluster = 1)))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[2],
    c("2" = length(get_genes(res_no_keep_nn, cluster = 2)))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[3],
    c("3" = length(get_genes(res_no_keep_nn, cluster = 3)))
  )
  expect_equal(
    res_no_keep_nn@gene_clusters_metadata$size[4],
    c("4" = length(get_genes(res_no_keep_nn, cluster = 4)))
  )
  
  
  # =======================================================
  # Test parameters in res_no_keep_nn@parameters
  expect_true(is.list(res_no_keep_nn@parameters))
  expect_equal(length(res_no_keep_nn@parameters), 12)
  expect_equal(names(res_no_keep_nn@parameters), c(
    "distance_method",
    "k",
    "noise_level",
    "fdr",
    "row_sum",
    "no_dknn_filter",
    "seed",
    "keep_nn",
    "k_mcl_graph",
    "output_path",
    "name",
    "inflation"
  ))
  
  expect_equal(res_no_keep_nn@parameters$output_path, tmp_dir)
  expect_true(is.character(res_no_keep_nn@parameters$output_path))
  expect_equal(length(res_no_keep_nn@parameters$output_path), 1)
  
  expect_true(is.character(res_no_keep_nn@parameters$name))
  expect_equal(length(res_no_keep_nn@parameters$name), 1)
  expect_equal(nchar(res_no_keep_nn@parameters$name), 10)
  expect_true(grepl("[[:alnum:]]+", res_no_keep_nn@parameters$name))
  
  expect_equal(res_no_keep_nn@parameters$distance_method, "pearson")
  expect_equal(length(res_no_keep_nn@parameters$distance_method), 1)
  
  expect_equal(res_no_keep_nn@parameters$k, 80)
  expect_true(is.numeric(res_no_keep_nn@parameters$k))
  expect_equal(length(res_no_keep_nn@parameters$k), 1)
  
  expect_equal(res_no_keep_nn@parameters$noise_level, 5e-05)
  expect_true(is.numeric(res_no_keep_nn@parameters$noise_level))
  expect_equal(length(res_no_keep_nn@parameters$noise_level), 1)
  
  expect_equal(res_no_keep_nn@parameters$fdr, 0.005)
  expect_equal(length(res_no_keep_nn@parameters$fdr), 1)
  
  expect_equal(res_no_keep_nn@parameters$row_sum, 1)
  expect_true(is.numeric(res_no_keep_nn@parameters$row_sum))
  expect_equal(length(res_no_keep_nn@parameters$row_sum), 1)
  
  expect_false(res_no_keep_nn@parameters$no_dknn_filter)
  
  expect_equal(res_no_keep_nn@parameters$seed, 123)
  expect_true(is.numeric(res_no_keep_nn@parameters$seed))
  expect_equal(length(res_no_keep_nn@parameters$seed), 1)
  
  expect_false(res_no_keep_nn@parameters$keep_nn)
  
  expect_equal(res_no_keep_nn@parameters$k_mcl_graph, 5)
  expect_true(is.numeric(res_no_keep_nn@parameters$k_mcl_graph))
  expect_equal(length(res_no_keep_nn@parameters$k_mcl_graph), 1)
  
  expect_equal(res_no_keep_nn@parameters$inflation, 1.8)
  expect_true(is.numeric(res_no_keep_nn@parameters$inflation))
  expect_equal(length(res_no_keep_nn@parameters$inflation), 1)
  
  
  # =======================================================
  # Test parameters in res_no_keep_nn@cluster_number
  expect_equal(res_no_keep_nn@gene_clusters_metadata$number, 16)
  expect_true(is.numeric(res_no_keep_nn@gene_clusters_metadata$number))
  
  
  # =======================================================
  # Test parameters in res_no_keep_nn@cluster_list
  expect_equal(length(res_no_keep_nn@gene_clusters_metadata$cluster_id), 16)
  expect_equal(res_no_keep_nn@gene_clusters_metadata$cluster_id, 1:16)
  expect_true(is.numeric(res_no_keep_nn@gene_clusters_metadata$cluster_id))
  
  # Remove temporary files
  unlink(tmp_dir, recursive = TRUE)
})




test_that("Checking output slots generated by gene_clustering() using keep_nn as TRUE", {
  ## A rather stringent version
  res_keep_nn <- gene_clustering(object = pbmc3k_medium_clust,
                                 inflation = 2.2,
                                 keep_nn = TRUE,
                                 threads = 4,
                                 name = "test")
  
  # =======================================================
  # Test name of temporary files
  expect_equal(res_keep_nn@parameters$name, "test")
  expect_true(file.exists(file.path(res_keep_nn@parameters$output_path,
                                    paste0(res_keep_nn@parameters$name,
                                           ".input_mcl.txt"))))
  expect_true(file.exists(file.path(res_keep_nn@parameters$output_path,
                                    paste0(res_keep_nn@parameters$name,
                                           ".mcl_out.txt"))))
  expect_true(file.exists(file.path(tempdir(), paste0(res_keep_nn@parameters$name,
                                                       ".input_mcl.txt"))))
  expect_true(file.exists(file.path(tempdir(), paste0(res_keep_nn@parameters$name,
                                                       ".input_mcl.txt"))))
  expect_equal(length(res_keep_nn@parameters$name), 1)
  expect_true(is.character(res_keep_nn@parameters$name))
  
  
  # =======================================================
  # Test matrix in res_keep_nn@data
  expect_equal(round(sum(res_keep_nn@data), 0), 70172)
  expect_equal(round(mean(res_keep_nn@data), 2), 0.64)
  expect_equal(round(median(res_keep_nn@data), 2), 0)
  expect_equal(round(sd(res_keep_nn@data), 1), 1.3)
  
  expect_equal(round(sum(colMeans(res_keep_nn@data)), 1), 230.8)
  expect_equal(round(sum(rowMeans(res_keep_nn@data)), 1), 194.4)
  
  expect_equal(ncol(res_keep_nn@data), 361)
  expect_equal(head(colnames(res_keep_nn@data)), c("GATCTACTGGTGAG-1", "ACAGTGACTCACCC-1", "AGACGTACAGAGGC-1", 
                                                   "GACGTAACCTGTGA-1", "TATACAGATCCAGA-1", "CGGATAACAGCTCA-1"))
  
  expect_equal(nrow(res_keep_nn@data), 304)
  
  expect_equal(nrow(res_keep_nn@data), length(get_genes(res_keep_nn)))
  expect_equal(rownames(res_keep_nn@data), get_genes(res_keep_nn))
  expect_equal(rownames(res_keep_nn@data), unlist(res_keep_nn@gene_clusters, use.names = F))
  expect_equal(nrow(res_keep_nn@data), length(get_genes(res_keep_nn)))
  expect_equal(nrow(res_keep_nn@data), length(unlist(res_keep_nn@gene_clusters)))
  
  expect_equal(round(sum(res_keep_nn@data^2), 1), 238981.3)
  
  expect_true(is.matrix(res_keep_nn@data))
  
  expect_equal(length(res_keep_nn@data), 109744)
  
  
  # =======================================================
  # Test observed distances in res_keep_nn@dbf_output$dknn
  expect_equal(round(sum(res_keep_nn@dbf_output$dknn), 1), 1324.4)
  expect_equal(round(mean(res_keep_nn@dbf_output$dknn), 1), 0.9)
  expect_equal(round(median(res_keep_nn@dbf_output$dknn), 2), 0.89)
  expect_equal(round(sd(res_keep_nn@dbf_output$dknn), 2), 0.07)
  
  expect_equal(length(res_keep_nn@dbf_output$dknn), 1531)
  
  expect_equal(round(sum(res_keep_nn@dbf_output$dknn^2), 1), 1152.9)
  
  expect_true(is.numeric(res_keep_nn@dbf_output$dknn))
  
  
  # =======================================================
  # Test simulated distances in res_keep_nn@dbf_output$simulated_dknn
  expect_equal(round(sum(res_keep_nn@dbf_output$simulated_dknn), 2), 1335.37)
  expect_equal(round(mean(res_keep_nn@dbf_output$simulated_dknn), 2), 0.87)
  expect_equal(round(median(res_keep_nn@dbf_output$simulated_dknn), 2), 0.87)
  expect_equal(round(sd(res_keep_nn@dbf_output$simulated_dknn), 2), 0.01)

  expect_equal(length(res_keep_nn@dbf_output$simulated_dknn), 1531)
  
  expect_equal(round(sum(res_keep_nn@dbf_output$simulated_dknn^2), 2), 1164.85)
  
  expect_true(is.numeric(res_keep_nn@dbf_output$simulated_dknn))
  
  
  # =======================================================
  # Test critical distance in res_keep_nn@critical_distance
  expect_equal(round(res_keep_nn@dbf_output$critical_distance, 2), 0.84)
  expect_true(is.numeric(res_keep_nn@dbf_output$critical_distance))
  expect_equal(length(res_keep_nn@dbf_output$critical_distance), 1)
  
  
  # =======================================================
  # Test gene clusters in res_keep_nn@gene_clusters
  expect_equal(sum(as.numeric(names(res_keep_nn@gene_clusters))), 36)
  expect_equal(names(res_keep_nn@gene_clusters), as.character(1:8))
  expect_equal(length(unlist(res_keep_nn@gene_clusters)), 304)
  expect_equal(length(unlist(res_keep_nn@gene_clusters$`1`)), 78)
  expect_equal(length(unlist(res_keep_nn@gene_clusters$`2`)), 47)
  expect_equal(length(unlist(res_keep_nn@gene_clusters$`3`)), 42)
  expect_equal(length(unlist(res_keep_nn@gene_clusters$`4`)), 37)
  
  expect_equal(unlist(res_keep_nn@gene_clusters, use.names = F), get_genes(res_keep_nn))
  expect_equal(
    unlist(res_keep_nn@gene_clusters$`1`, use.names = F),
    get_genes(res_keep_nn, cluster = 1)
  )
  expect_equal(
    unlist(res_keep_nn@gene_clusters$`2`, use.names = F),
    get_genes(res_keep_nn, cluster = 2)
  )
  expect_equal(
    unlist(res_keep_nn@gene_clusters$`3`, use.names = F),
    get_genes(res_keep_nn, cluster = 3)
  )
  expect_equal(
    unlist(res_keep_nn@gene_clusters$`4`, use.names = F),
    get_genes(res_keep_nn, cluster = 4)
  )
  expect_equal(unlist(res_keep_nn@gene_clusters, use.names = F), rownames(res_keep_nn@data))
  
  expect_true(is.list(res_keep_nn@gene_clusters))
  expect_true(is.character(res_keep_nn@gene_clusters$`1`))
  
  
  # =======================================================
  # Test size of gene clusters in res_keep_nn@gene_clusters_metadata$size
  expect_equal(res_keep_nn@gene_clusters_metadata$size, c(`1` = 78L, `2` = 47L, `3` = 42L, `4` = 37L, `5` = 36L, `6` = 24L, 
                                                          `7` = 23L, `8` = 17L))
  expect_true(is.integer(res_keep_nn@gene_clusters_metadata$size))

  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[1],
    c("1" = length(res_keep_nn@gene_clusters$`1`))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[2],
    c("2" = length(res_keep_nn@gene_clusters$`2`))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[3],
    c("3" = length(res_keep_nn@gene_clusters$`3`))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[4],
    c("4" = length(res_keep_nn@gene_clusters$`4`))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[1],
    c("1" = length(get_genes(res_keep_nn, cluster = 1)))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[2],
    c("2" = length(get_genes(res_keep_nn, cluster = 2)))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[3],
    c("3" = length(get_genes(res_keep_nn, cluster = 3)))
  )
  expect_equal(
    res_keep_nn@gene_clusters_metadata$size[4],
    c("4" = length(get_genes(res_keep_nn, cluster = 4)))
  )
  
  
  # =======================================================
  # Test parameters in res_keep_nn@parameters
  expect_true(is.list(res_keep_nn@parameters))
  expect_equal(length(res_keep_nn@parameters), 11)
  expect_equal(names(res_keep_nn@parameters), c(
    "distance_method",
    "k",
    "noise_level",
    "fdr",
    "row_sum",
    "no_dknn_filter",
    "seed",
    "keep_nn",
    "output_path",
    "name",
    "inflation"
  ))
  
  expect_true(is.character(res_keep_nn@parameters$output_path))
  expect_equal(length(res_keep_nn@parameters$output_path), 1)
  
  expect_equal(res_keep_nn@parameters$name, "test")
  expect_equal(length(res_keep_nn@parameters$name), 1)
  
  expect_equal(res_keep_nn@parameters$distance_method, "pearson")
  expect_equal(length(res_keep_nn@parameters$distance_method), 1)
  
  expect_equal(res_keep_nn@parameters$k, 80)
  expect_equal(length(res_keep_nn@parameters$k), 1)
  
  expect_equal(res_keep_nn@parameters$noise_level, 5e-05)
  expect_equal(length(res_keep_nn@parameters$noise_level), 1)
  
  expect_equal(res_keep_nn@parameters$fdr,  0.005)
  expect_equal(length(res_keep_nn@parameters$fdr), 1)
  
  expect_equal(res_keep_nn@parameters$row_sum, 1)
  expect_equal(length(res_keep_nn@parameters$row_sum), 1)
  
  expect_false(res_keep_nn@parameters$no_dknn_filter)
  
  expect_equal(res_keep_nn@parameters$seed, 123)
  expect_equal(length(res_keep_nn@parameters$seed), 1)
  
  expect_true(res_keep_nn@parameters$keep_nn)
  
  expect_equal(res_keep_nn@parameters$inflation, 2.2)
  expect_equal(length(res_keep_nn@parameters$inflation), 1)
  
  
  # =======================================================
  # Test parameters in res_keep_nn@cluster_number
  expect_equal(res_keep_nn@gene_clusters_metadata$number, 8)
  
  
  # =======================================================
  # Test parameters in res_keep_nn@cluster_list
  expect_equal(length(res_keep_nn@gene_clusters_metadata$cluster_id), 8)
  expect_equal(res_keep_nn@gene_clusters_metadata$cluster_id, 1:8)
})





# ==============================================================================
# Test output_path argument
# ==============================================================================

test_that("Checking stop if output directory provided does not exist", {
  expect_error(gene_clustering(data = m,
                               name = "test",
                               inflation = 2,
                               output_path = "not_a_directory"))
})
dputhier/scigenex documentation built on May 31, 2024, 8:59 a.m.