# generate testing data
set.seed(1665)
merged_sce <- sim_sce(n_cells = 303, n_genes = 100, n_empty = 0)
batches <- rep(c("a", "b", "c"), each = 101)
barcodes <- rownames(colData(merged_sce))
# set up colData rownames as batch-barcode
new_rownames <- glue::glue("{batches}-{barcodes}")
rownames(colData(merged_sce)) <- new_rownames
# Add "sample" to colData
colData(merged_sce)$sample <- batches
# add barcode to colData
colData(merged_sce)$cell_id <- barcodes
# add PCs for testing (exact numbers don't matter)
# make a 300x100 matrix
reducedDim(merged_sce, "PCA") <- matrix(runif(303 * 100, min = 0, max = 100), nrow = 303)
rownames(reducedDim(merged_sce, "PCA")) <- colnames(merged_sce)
# grab pcs and batches
pcs <- reducedDim(merged_sce, "PCA")
batches <- merged_sce$sample
# create list of sce objects
batch_ids <- unique(batches)
sce_list <- purrr::map(
batch_ids,
\(batch){
individual_sce <- merged_sce[, which(colData(merged_sce)$sample == batch)]
# individual sce object columns should be named without batches
colnames(individual_sce) <- stringr::word(colnames(individual_sce), -1, sep = "-")
return(individual_sce)
}
) |>
purrr::set_names(batch_ids)
test_that("`filter_pcs` works as expected", {
# expect rownames of returned pcs to be the same as the batch column
named_pcs <- filter_pcs(pcs, batches)
expect_true(all.equal(rownames(named_pcs), batches))
})
test_that("`filter_pcs` works as expected when rename_pcs is FALSE", {
# set input PCA names to be something else
named_pcs <- filter_pcs(pcs, batches, rename_pcs = FALSE)
expect_true(all.equal(rownames(named_pcs), rownames(pcs)))
})
test_that("`filter_pcs` removes NAs as expected", {
batches[1:5] <- NA # set a subset of batch labels to NAs
# expect rownames to be equal to only the batches without NA
named_pcs <- filter_pcs(pcs, batches)
expect_true(all.equal(rownames(named_pcs), batches[!is.na(batches)]))
expect_false(length(rownames(named_pcs)) == length(batches))
})
test_that("`filter_pcs` fails without batch labels", {
all_na_batches <- NA
expect_error(filter_pcs(pcs, all_na_batches))
})
test_that("`downsample_pcs` works as expected", {
test_frac <- 0.8
downsampled <- downsample_pcs(pcs, frac_cells = test_frac)
downsampled_pcs <- downsampled
downsampled_batch_labels <- rownames(downsampled)
# check that the correct number of cells were downsampled
expect_true(length(rownames(downsampled_pcs)) == round(test_frac * length(rownames(pcs))))
expect_true(length(downsampled_batch_labels) == round(test_frac * length(batches)))
# check that the columns in downsampled were in the original pcs
expect_true(length(colnames(downsampled_pcs)) == length(colnames(pcs)))
})
test_that("`downsample_pcs` fails as expected", {
# check that downsampling fails when providing a fraction outside of 0-1 range
expect_error(downsample_pcs(pcs, frac_cells = 2))
expect_error(downsample_pcs(pcs, frac_cells = 0))
})
test_that("`calculate_silhouette_width` works as expected", {
nreps <- 5
asw <- calculate_silhouette_width(
merged_sce = merged_sce,
pc_names = "PCA",
nreps = 5,
batch_column = "sample"
)
expected_cols <- c(
"rep", "silhouette_width", "silhouette_cluster",
"other_cluster", "pc_name"
)
# check column names
expect_true(all(expected_cols %in% colnames(asw)))
# check that pc name is PCA
expect_true(all(asw$pc_name == "PCA"))
# check that number of reps is correct
expect_true(all(sort(unique(asw$rep)) == 1:nreps))
# check that width is in expected range
expect_true(all(dplyr::between(asw$silhouette_width, -1, 1)))
})
test_that("`calculate_silhouette_width`fails as expected", {
# missing pc name
expect_error(calculate_silhouette_width(merged_sce,
pc_names = "test_PC",
batch_column = "sample"
))
# error in nreps
expect_error(calculate_silhouette_width(merged_sce,
pc_names = "PCA",
nreps = "twenty",
batch_column = "sample"
))
# incorrect batch labels
expect_error(calculate_silhouette_width(merged_sce,
pc_names = "PCA",
batch_column = "batch"
))
})
test_that("`within_batch_ari_from_pcs` works as expected", {
ari_from_pcs <- within_batch_ari_from_pcs(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_name = "PCA",
batch_column = "sample",
cell_id_column = "cell_id"
)
expected_cols <- c(
"ari", "batch_id", "pc_name"
)
# check column names
expect_true(all(expected_cols %in% colnames(ari_from_pcs)))
# check that pc name is PCA
expect_true(all(ari_from_pcs$pc_name == "PCA"))
# check that ari is in expected range
expect_true(all(
dplyr::between(ari_from_pcs$ari, -1, 1)
))
})
test_that("`within_batch_ari_from_pcs`fails as expected", {
# missing pc name
expect_error(within_batch_ari_from_pcs(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_name = "test_PC",
batch_column = "sample",
cell_id_column = "cell_id"
))
# incorrect batch labels
expect_error(within_batch_ari_from_pcs(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_name = "PCA",
batch_column = "batch",
cell_id_column = "cell_id"
))
# incorrect barcode label
expect_error(within_batch_ari_from_pcs(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_name = "PCA",
batch_column = "sample",
cell_id_column = "not a barcode"
))
## missing names for sce list
# save to a new variable so we can use the sce list again later
unnamed_sce_list <- sce_list
names(unnamed_sce_list) <- NULL
expect_error(within_batch_ari_from_pcs(
individual_sce_list = unnamed_sce_list,
merged_sce = merged_sce,
pc_name = "PCA",
batch_column = "sample",
cell_id_column = "cell_id"
))
})
test_that("`calculate_within_batch_ari` works as expected", {
# add fastmnn pca to test multiple pcs
reducedDim(merged_sce, "fastMNN_PCA") <- matrix(runif(303 * 100, min = 0, max = 100), nrow = 303)
ari <- scpcaTools::calculate_within_batch_ari(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_names = c("PCA", "fastMNN_PCA"),
batch_column = "sample",
cell_id_column = "cell_id"
)
expected_cols <- c(
"ari", "batch_id", "pc_name"
)
# check column names
expect_true(all(expected_cols %in% colnames(ari)))
# check that pc name is PCA
expect_true(all(ari$pc_name %in% c("PCA", "fastMNN_PCA")))
# check that ari is in expected range
expect_true(all(
dplyr::between(ari$ari, -1, 1)
))
})
test_that("`calculate_within_batch_ari`fails as expected", {
# missing pc name
expect_error(calculate_within_batch_ari(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_names = "test_PC",
batch_column = "sample",
cell_id_column = "cell_id"
))
# incorrect batch labels
expect_error(within_batch_ari_from_pcs(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_names = c("PCA", "fastMNN_PCA"),
batch_column = "batch",
cell_id_column = "cell_id"
))
# incorrect barcode labels
expect_error(within_batch_ari_from_pcs(
individual_sce_list = sce_list,
merged_sce = merged_sce,
pc_names = c("PCA", "fastMNN_PCA"),
batch_column = "sample",
cell_id_column = "not a barcode"
))
})
test_that("`calculate_ilisi` works as expected", {
ilisi <- calculate_ilisi(
merged_sce = merged_sce,
pc_name = "PCA",
batch_column = "sample"
)
expected_cols <- c(
"ilisi_score", "cell_barcode", "batch_id", "ilisi_score_norm"
)
# check column names
expect_true(all(expected_cols %in% colnames(ilisi)))
# check that normalized ilisi scores are in in expected range
expect_true(
all(
dplyr::between(ilisi$ilisi_score_norm, 0, 1)
)
)
})
test_that("`calculate_ilisi` fails as expected", {
# fails with missing PC name
expect_error(calculate_ilisi(
merged_sce = merged_sce,
pc_name = "test_PC",
batch_column = "sample"
))
# fails with incorrect batch column
expect_error(calculate_ilisi(
merged_sce = merged_sce,
pc_name = "PCA",
batch_column = "library_id"
))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.