tests/testthat/test_assay5.R

#' Returns a random counts matrix.
get_test_assay <- function(ncells, nfeatures, assay_version) {
  # Use the `assay_version` param to choose the correct assay builder.
  create_assay <- switch(assay_version,
    v3 = CreateAssayObject,
    v5 = CreateAssay5Object,
    stop("`assay_version` should be one of 'v3', 'v5'")
  )

  # Populate a `nfeatures` x `ncells` matrix with zeros.
  counts <- matrix(0, ncol = ncells, nrow = nfeatures)

  # Assign column and row names to the matrix to label cells and genes.
  colnames(counts) <- paste0("cell", seq(ncol(counts)))
  row.names(counts) <- paste0("gene", seq(nrow(counts)))

  # Convert `counts` to a `dgCMatrix`.
  counts_sparse <- as.sparse(counts)
  # Build an assay of the specified type.
  assay <- create_assay(counts_sparse)

  return(assay)
}

#' Mocks "highly variable feature" annotations and adds them to the
#' feature-level metadata of `assay`.
add_hvf_info <- function(
  assay, 
  nfeatures = NULL,
  features = NULL,
  method_name, 
  layer_name
) {
  if (is.null(nfeatures) & is.null(features)) {
    # Ensure that one of `nfeatures` or `features` is set.
    stop("One of `nfeatures` or `features` must be provided.")
  } else if (!is.null(nfeatures) & !is.null(features)) {
    # Ensure that only one of `nfeatures` or `features` is set.
    stop("Only one of `nfeatures` or `feature` may be provided.")
  } else if (!is.null(nfeatures)) {
    # If `nfeatures` is provided, randomly sample from the `assay`'s features.
    variable_features <- sample(
      rownames(assay),
      size = nfeatures,
      replace = FALSE
    )
  } else {
    # If `features` was provided, use it.
    variable_features <- features
  }

  all_features <- rownames(assay)
  constant_features <- setdiff(all_features, variable_features)

  # Add a column to the assay's feature-level metadata indicating which
  # features are variable.
  is_variable <- all_features %in% variable_features
  names(is_variable) <- all_features
  variable_column <- paste("vf", method_name, layer_name, "variable", sep = "_")
  assay[[variable_column]] <- is_variable

  # Add a column to the assay's feature-level metadata indicating the order
  # (i.e. "rank") that each variable feature was selected in.
  variable_rank <- seq_along(variable_features)
  names(variable_rank) <- variable_features
  constant_rank <- seq_along(constant_features) + length(variable_features)
  names(constant_rank) <- constant_features
  feature_rank <- c(variable_rank, constant_rank)
  rank_column <- paste("vf", method_name, layer_name, "rank", sep = "_")
  assay[[rank_column]] <- feature_rank

  # Add a column to the assay's feature-level metadata containing a random
  # values in reverse order from `variable_rank` (i.e. the lowest rank
  # has the highest value).
  feature_values <- runif(length(all_features))
  feature_values <- sort(feature_values, decreasing = TRUE)
  names(feature_values) <- names(feature_rank)
  value_column <- paste("vf", method_name, layer_name, "value", sep = "_")
  assay[[value_column]] <- feature_values

  return(assay)
}

context("HVFInfo")

test_that("`HVFInfo.Assay5` works with a single set of metadata", {
  # Populate an assay with random values for testing.
  assay <- get_test_assay(
    ncells = 10,
    nfeatures = 10,
    assay_version = "v5"
  )
  # Add similarly random HVF metadata to `assay`.
  assay <- add_hvf_info(
    assay,
    nfeatures = 10,
    method_name = "vst",
    layer_name = "counts"
  )

  # Extract the expected HVFInfo and rename the columns.
  info_columns <- c(
    "vf_vst_counts_variable",
    "vf_vst_counts_rank",
    "vf_vst_counts_value"
  )
  expected_info <- assay[[]][, info_columns]
  colnames(expected_info) <- c("variable", "rank", "value")

  # Check the base case where `method` and `layer` are both set and valid.
  result <- HVFInfo(assay, method = "vst", layer = "counts")
  expect_identical(result, expected_info["value"])

  # Check the same case with all relevant HVF columns returned.
  result <- HVFInfo(assay, method = "vst", layer = "counts", status = TRUE)
  expect_identical(result, expected_info)

  # Check that `layer` can be omitted.
  result <- HVFInfo(assay, method = "vst")
  expect_identical(result, expected_info["value"])

  # Check that `layer` can be `NULL`.
  result <- HVFInfo(assay, method = "vst", layer = NULL)
  expect_identical(result, expected_info["value"])

  # Check that `layer` can be `NA`.
  result <- HVFInfo(assay, method = "vst", layer = NA)
  expect_identical(result, expected_info["value"])

  # Check that the `method` parameter can be omitted.
  result <- HVFInfo(assay)
  expect_identical(result, expected_info["value"])

  # Check that `NULL` is returned if `method` does not point HVF metadata.
  result <- expect_warning(
    HVFInfo(assay, method = "not-a-method"),
    paste(
      "Unable to find highly variable feature information for",
      "method='not-a-method' and layer='NA'."
    )
  )
  expect_null(result)
  result <- expect_warning(
    HVFInfo(assay, method = "not-a-method", layer = "counts"),
    paste(
      "Unable to find highly variable feature information for",
      "method='not-a-method' and layer='counts'."
    )
  )
  expect_null(result)
})

test_that("`HVFInfo.Assay5` works with multiple methods run on different layers", {
  # Populate an assay with random values for testing.
  assay <- get_test_assay(
    ncells = 10,
    nfeatures = 10,
    assay_version = "v5"
  )
  # Add similarly random HVF metadata to `assay`.
  assay <- add_hvf_info(
    assay,
    nfeatures = 5,
    method_name = "vst",
    layer_name = "counts"
  )
  # Add a second "data" layer to the assay by duplicating "count".
  LayerData(assay, layer = "data") <- LayerData(assay, layer = "counts")
  # Add a second set of HVF columns to `assay`.
  assay <- add_hvf_info(
    assay,
    nfeatures = 5,
    method_name = "mvp",
    layer_name = "data"
  )

  # Extract the first set of HVF info and rename the columns.
  vst_columns <- c(
    "vf_vst_counts_variable",
    "vf_vst_counts_rank",
    "vf_vst_counts_value"
  )
  vst_info <- assay[[]][, vst_columns]
  colnames(vst_info) <- c("variable", "rank", "value")
  # Extract the first set of HVF info and rename the columns.
  mvp_columns <- c(
    "vf_mvp_data_variable",
    "vf_mvp_data_rank",
    "vf_mvp_data_value"
  )
  mvp_info <- assay[[]][, mvp_columns]
  colnames(mvp_info) <- c("variable", "rank", "value")

  # Check the base case where `method` and `layer` are both set and valid.
  result <- HVFInfo(assay, method = "vst", layer = "counts")
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp", layer = "data")
  expect_identical(result, mvp_info["value"])

  # Check that `layer` can be omitted. In this case, `layer` will default to
  # `NA` which will be in turn be interpreted as `Layers(assay)` (i.e. all layers).
  result <- HVFInfo(assay, method = "vst")
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp")
  expect_identical(result, mvp_info["value"])

  # Check that `layer` can be `NULL`. In this case, `layer` will be interpreted
  # as `DefaultLayer(assay)`. Thus, we are expected `layer` to resolve to "counts".
  result <- HVFInfo(assay, method = "vst", layer = NULL)
  expect_identical(result, vst_info["value"])
  result <- expect_warning(
    HVFInfo(assay, method = "mvp", layer = NULL),
    paste(
      "Unable to find highly variable feature information for",
      "method='mvp' and layer='NULL'."
    )
  )
  expect_null(result)

  # Check that `layer` can be `NA`. In this case, `layer` will be interpreted
  # as `Layers(assay)` (i.e. all layers).
  result <- HVFInfo(assay, method = "vst", layer = NA)
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp", layer = NA)
  expect_identical(result, mvp_info["value"])
})

test_that("`HVFInfo.Assay5` works with a single method run on multiple layers", {
  # Populate an assay with random values for testing.
  assay <- get_test_assay(
    ncells = 10,
    nfeatures = 10,
    assay_version = "v5"
  )
  # Split the "counts" layer in half across it's columns and drop the original layer.
  LayerData(assay, layer = "counts.1") <- LayerData(assay, layer = "counts")[, 1:5]
  LayerData(assay, layer = "counts.2") <- LayerData(assay, layer = "counts")[, 5:10]
  # Since it's first, "counts.1" would be chosen as the default layer when
  # "counts" is dropped but we'll do it explicitly (also avoids a warning).
  DefaultLayer(assay) <- "counts.1"
  LayerData(assay, layer = "counts") <- NULL

  # Add random HVF metadata for each layer. By adding "counts.2" before "counts.1"
  # we introduce discrepancy between the behavior of `layer = NULL` and `layer = NA`.
  assay <- add_hvf_info(
    assay,
    nfeatures = 5,
    method_name = "vst",
    layer_name = "counts.2"
  )
  assay <- add_hvf_info(
    assay,
    nfeatures = 5,
    method_name = "vst",
    layer_name = "counts.1"
  )

  # Extract the first set of HVF info and rename the columns.
  vst.1_columns <- c(
    "vf_vst_counts.1_variable",
    "vf_vst_counts.1_rank",
    "vf_vst_counts.1_value"
  )
  vst.1_info <- assay[[]][, vst.1_columns]
  colnames(vst.1_info) <- c("variable", "rank", "value")
  # Extract the second set of HVF info and rename the columns.
  vst.2_columns <- c(
    "vf_vst_counts.2_variable",
    "vf_vst_counts.2_rank",
    "vf_vst_counts.2_value"
  )
  vst.2_info <- assay[[]][, vst.2_columns]
  colnames(vst.2_info) <- c("variable", "rank", "value")

  # Check the base case where `method` and `layer` are both set and valid.
  result <- HVFInfo(assay, method = "vst", layer = "counts.1")
  expect_identical(result, vst.1_info["value"])
  result <- HVFInfo(assay, method = "vst", layer = "counts.2")
  expect_identical(result, vst.2_info["value"])

  # Check that `layer` can be omitted. In this case, `layer` will default to
  # `NULL` which will be in turn be interpreted as `Layers(assay)`
  # (i.e. all layers). Thus, we are expected `layer` to resolve to "counts.2".
  result <- HVFInfo(assay, method = "vst")
  expect_identical(result, vst.2_info["value"])

  # Check that `layer` can be `NULL`. In this case, `layer` will be interpreted
  # as `DefaultLayer(assay)`. Thus, we are expected `layer` to resolve to "counts".
  result <- HVFInfo(assay, method = "vst", layer = NULL)
  expect_identical(result, vst.1_info["value"])

  # Check that `layer` can be `NA`. In this case, `layer` will be interpreted
  # as `Layers(assay)` (i.e. all layers) and the first one associated with
  # `method` will be returned.
  result <- HVFInfo(assay, method = "vst", layer = NA)
  expect_identical(result, vst.2_info["value"])
})

test_that("`HVFInfo.Assay5` works with multiple methods run on the same layer", {
  # Populate an assay with random values for testing.
  assay <- get_test_assay(
    ncells = 10,
    nfeatures = 10,
    assay_version = "v5"
  )
  # Add similarly random HVF metadata to `assay`.
  assay <- add_hvf_info(
    assay,
    nfeatures = 5,
    method_name = "vst",
    layer_name = "counts"
  )
  # Add a second set of HVF columns to `assay`.
  assay <- add_hvf_info(
    assay,
    nfeatures = 5,
    method_name = "mvp",
    layer_name = "counts"
  )

  # Extract the first set of HVF info and rename the columns.
  vst_columns <- c(
    "vf_vst_counts_variable",
    "vf_vst_counts_rank",
    "vf_vst_counts_value"
  )
  vst_info <- assay[[]][, vst_columns]
  colnames(vst_info) <- c("variable", "rank", "value")
  # Extract the first set of HVF info and rename the columns.
  mvp_columns <- c(
    "vf_mvp_counts_variable",
    "vf_mvp_counts_rank",
    "vf_mvp_counts_value"
  )
  mvp_info <- assay[[]][, mvp_columns]
  colnames(mvp_info) <- c("variable", "rank", "value")

  # Check the base case where `method` and `layer` are both set and valid.
  result <- HVFInfo(assay, method = "vst", layer = "counts")
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp", layer = "counts")
  expect_identical(result, mvp_info["value"])

  # Check the same case with all relevant HVF columns returned.
  result <- HVFInfo(assay, method = "vst", layer = "counts", status = TRUE)
  expect_identical(result, vst_info)
  result <- HVFInfo(assay, method = "mvp", layer = "counts", status = TRUE)
  expect_identical(result, mvp_info)

  # Check that `layer` can be omitted. In this case, `layer` will default to
  # `NULL` which will be in turn be interpreted as `DefaultLayer(assay)`.
  # Thus, we are expected `layer` to resolve to "counts".
  result <- HVFInfo(assay, method = "vst")
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp")
  expect_identical(result, mvp_info["value"])

  # Check that `layer` can be `NULL`. In this case, `layer` will be interpreted
  # as `DefaultLayer(assay)`. Thus, we are expected `layer` to resolve to "counts".
  result <- HVFInfo(assay, method = "vst", layer = NULL)
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp", layer = NULL)
  expect_identical(result, mvp_info["value"])

  # Check that `layer` can be `NA`. In this case, `layer` will be interpreted
  # as `Layers(assay)` (i.e. all layers).
  result <- HVFInfo(assay, method = "vst", layer = NA)
  expect_identical(result, vst_info["value"])
  result <- HVFInfo(assay, method = "mvp", layer = NA)
  expect_identical(result, mvp_info["value"])
})

context("VariableFeatures")

test_that("`VariableFeatures.Assay5` getter/setter works as expected", {
  # Populate an assay with random values for testing.
  assay <- get_test_assay(
    ncells = 10,
    nfeatures = 10,
    assay_version = "v5"
  )

  # Set initial variable features (including some missing values).
  test_features <- c(gene1 = "gene1", gene3 = "gene3", gene5 = "gene5")
  assay[["var.features"]] <- test_features
  expected_features <- c("gene1", "gene3", "gene5")
  expect_identical(VariableFeatures(assay), expected_features)
  expect_identical(VariableFeatures(assay, nfeatures = 2), expected_features[1:2])

  # Set the ranking for the variable features.
  test_rank <- c(gene1 = 3, gene3 = 1, gene5 = 2)
  assay[["var.features.rank"]] <- test_rank
  expected_features <- c("gene3", "gene5", "gene1")
  expect_identical(VariableFeatures(assay), expected_features)
  expect_identical(VariableFeatures(assay, nfeatures = 2), expected_features[1:2])

  # Use the VariableFeatures setter to overwrite the current variable features.
  expected_features <- c("gene10", "gene2", "gene8")
  VariableFeatures(assay) <- expected_features
  expect_identical(VariableFeatures(assay), expected_features)
})

test_that("`VariableFeatures` works with multi-layer assays", {
  # Populate an assay with random values for testing.
  assay <- get_test_assay(
    ncells = 9,
    nfeatures = 10,
    assay_version = "v5"
  )  
  # Split the "counts" layer in thirds across it's columns and drop the original layer.
  LayerData(assay, layer = "counts.1") <- LayerData(assay, layer = "counts")[, 1:3]
  # Leave "gene10" out of "counts.2" so that it cannot be called variable.
  LayerData(assay, layer = "counts.2") <- LayerData(assay, layer = "counts")[1:9, 4:6]
  LayerData(assay, layer = "counts.3") <- LayerData(assay, layer = "counts")[, 7:9]
  # Since it's first, "counts.1" would be chosen as the default layer when
  # "counts" is dropped but we'll do it explicitly (also avoids a warning).
  DefaultLayer(assay) <- "counts.1"
  LayerData(assay, layer = "counts") <- NULL

  # Add HVF metadata to each layer.
  assay <- add_hvf_info(
    assay,
    features = c("gene1", "gene2", "gene3"),
    method_name = "vst",
    layer_name = "counts.1"
  )
  assay <- add_hvf_info(
    assay,
    features = c("gene3", "gene2", "gene4"),
    method_name = "vst",
    layer_name = "counts.2"
  )
  assay <- add_hvf_info(
    assay,
    features = c("gene1", "gene2", "gene10"),
    method_name = "vst",
    layer_name = "counts.3"
  )

  # Expect the consensus (aggregated) variable features from the 
  # multi-layer assay.
  expected_features <- c("gene2", "gene1", "gene3", "gene4")
  expect_identical(VariableFeatures(assay), expected_features)
  expect_identical(VariableFeatures(assay, nfeatures = 2), expected_features[1:2])
})
mojaveazure/seurat-object documentation built on April 5, 2025, 3:13 a.m.