tests/testthat/test-taxa_contribution.R

# =============================================================================
# Tests for taxa contribution analysis
# =============================================================================

# Shared test data generator
create_contrib_test_data <- function() {
  set.seed(123)
  samples <- c("S1", "S2")
  functions <- c("K00001", "K00002", "K00003")
  taxa <- c("ASV1", "ASV2", "ASV3", "ASV4")

  contrib <- expand.grid(
    sample = samples,
    function_id = functions,
    taxon = taxa,
    stringsAsFactors = FALSE
  )
  # Rename function_id -> function for raw input format
  contrib_raw <- contrib
  colnames(contrib_raw)[colnames(contrib_raw) == "function_id"] <- "function"
  contrib_raw$taxon_function_abun <- runif(nrow(contrib_raw))
  contrib_raw$norm_taxon_function_contrib <- runif(nrow(contrib_raw))

  # Stratified format (wide)
  strat_long <- expand.grid(
    function_id = functions,
    taxon = taxa,
    stringsAsFactors = FALSE
  )
  strat_wide <- data.frame(
    `function` = strat_long$function_id,
    sequence = strat_long$taxon,
    S1 = runif(nrow(strat_long)),
    S2 = runif(nrow(strat_long)),
    check.names = FALSE
  )

  # Taxonomy (QIIME2 format)
  taxonomy_qiime2 <- data.frame(
    `Feature.ID` = taxa,
    Taxon = c(
      "k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__acidophilus",
      "k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__coli",
      "k__Bacteria;p__Firmicutes;c__Clostridia;o__Eubacteriales;f__Lachnospiraceae;g__Roseburia;s__intestinalis",
      "k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__fragilis"
    ),
    check.names = FALSE,
    stringsAsFactors = FALSE
  )

  # Taxonomy (DADA2 format)
  taxonomy_dada2 <- data.frame(
    ASV = taxa,
    Kingdom = rep("Bacteria", 4),
    Phylum = c("Firmicutes", "Proteobacteria", "Firmicutes", "Bacteroidetes"),
    Class = c("Bacilli", "Gammaproteobacteria", "Clostridia", "Bacteroidia"),
    Order = c("Lactobacillales", "Enterobacterales", "Eubacteriales", "Bacteroidales"),
    Family = c("Lactobacillaceae", "Enterobacteriaceae", "Lachnospiraceae", "Bacteroidaceae"),
    Genus = c("Lactobacillus", "Escherichia", "Roseburia", "Bacteroides"),
    Species = c("acidophilus", "coli", "intestinalis", "fragilis"),
    stringsAsFactors = FALSE
  )

  # Metadata
  metadata <- data.frame(
    sample = c("S1", "S2"),
    group = c("Control", "Treatment"),
    stringsAsFactors = FALSE
  )

  list(
    contrib_raw = contrib_raw,
    strat_wide = strat_wide,
    taxonomy_qiime2 = taxonomy_qiime2,
    taxonomy_dada2 = taxonomy_dada2,
    metadata = metadata
  )
}


# ---- read_contrib_file ----

test_that("read_contrib_file parses contrib data correctly", {
  td <- create_contrib_test_data()
  result <- read_contrib_file(data = td$contrib_raw)

  expect_s3_class(result, "data.frame")
  expect_true("function_id" %in% colnames(result))
  expect_false("function" %in% colnames(result))
  expect_true(all(c("sample", "taxon", "taxon_function_abun",
                     "norm_taxon_function_contrib") %in% colnames(result)))
  expect_equal(nrow(result), nrow(td$contrib_raw))
})

test_that("read_contrib_file strips ko: prefix", {
  td <- create_contrib_test_data()
  td$contrib_raw$`function` <- paste0("ko:", td$contrib_raw$`function`)
  result <- read_contrib_file(data = td$contrib_raw)
  expect_false(any(grepl("^ko:", result$function_id)))
})

test_that("read_contrib_file accepts already standardized function_id", {
  td <- create_contrib_test_data()
  contrib_standardized <- td$contrib_raw
  colnames(contrib_standardized)[colnames(contrib_standardized) == "function"] <- "function_id"

  result <- read_contrib_file(data = contrib_standardized)

  expect_true("function_id" %in% colnames(result))
  expect_false("function" %in% colnames(result))
  expect_equal(nrow(result), nrow(contrib_standardized))
})

test_that("read_contrib_file errors on missing columns", {
  bad_df <- data.frame(sample = "S1", x = 1)
  expect_error(read_contrib_file(data = bad_df), "Missing required columns")
})

test_that("read_contrib_file requires file or data", {
  expect_error(read_contrib_file(), "Please provide either")
})


# ---- read_strat_file ----

test_that("read_strat_file converts wide to long format", {
  td <- create_contrib_test_data()
  result <- read_strat_file(data = td$strat_wide)

  expect_s3_class(result, "data.frame")
  expect_true(all(c("function_id", "taxon", "sample", "abundance")
                  %in% colnames(result)))
  # 3 functions x 4 taxa x 2 samples = 24 rows
  expect_equal(nrow(result), 24)
})

test_that("read_strat_file strips ko: prefix", {
  td <- create_contrib_test_data()
  td$strat_wide$`function` <- paste0("ko:", td$strat_wide$`function`)
  result <- read_strat_file(data = td$strat_wide)
  expect_false(any(grepl("^ko:", result$function_id)))
})

test_that("read_strat_file accepts already standardized column names", {
  td <- create_contrib_test_data()
  strat_standardized <- td$strat_wide
  colnames(strat_standardized)[colnames(strat_standardized) == "function"] <- "function_id"
  colnames(strat_standardized)[colnames(strat_standardized) == "sequence"] <- "taxon"

  result <- read_strat_file(data = strat_standardized)

  expect_true(all(c("function_id", "taxon", "sample", "abundance")
                  %in% colnames(result)))
  expect_equal(nrow(result), 24)
})

test_that("read_strat_file errors on too few columns", {
  bad_df <- data.frame(`function` = "K00001", check.names = FALSE)
  expect_error(read_strat_file(data = bad_df), "at least 3 columns")
})


# ---- aggregate_taxa_contributions ----

test_that("aggregate_taxa_contributions returns expected shape", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 4)

  expect_s3_class(agg, "data.frame")
  expect_true(all(c("sample", "function_id", "taxon_label", "contribution")
                  %in% colnames(agg)))
  expect_gt(nrow(agg), 0)
})

test_that("aggregate_taxa_contributions with QIIME2 taxonomy uses genus labels", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(
    contrib,
    taxonomy = td$taxonomy_qiime2,
    tax_level = "Genus",
    top_n = 10
  )

  expect_true(any(agg$taxon_label %in%
                    c("Lactobacillus", "Escherichia", "Roseburia", "Bacteroides")))
})

test_that("aggregate_taxa_contributions with DADA2 taxonomy uses genus labels", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(
    contrib,
    taxonomy = td$taxonomy_dada2,
    tax_level = "Genus",
    top_n = 10
  )

  expect_true(any(agg$taxon_label %in%
                    c("Lactobacillus", "Escherichia", "Roseburia", "Bacteroides")))
})

test_that("aggregate_taxa_contributions does not duplicate rows for repeated taxonomy IDs", {
  contrib <- data.frame(
    sample = "S1",
    function_id = "K00001",
    taxon = "ASV1",
    taxon_function_abun = 1,
    norm_taxon_function_contrib = 1,
    stringsAsFactors = FALSE
  )
  taxonomy <- data.frame(
    Feature.ID = c("ASV1", "ASV1"),
    Taxon = c("k__Bacteria;g__Alpha", "k__Bacteria;g__Beta"),
    check.names = FALSE,
    stringsAsFactors = FALSE
  )

  expect_warning(
    agg <- aggregate_taxa_contributions(contrib, taxonomy = taxonomy, top_n = 10),
    "Duplicate taxon IDs found"
  )
  expect_equal(nrow(agg), 1)
  expect_equal(sum(agg$contribution), 1)
})

test_that("aggregate_taxa_contributions creates 'Other' category with top_n", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 2)

  expect_true("Other" %in% agg$taxon_label)
  # Should have at most 3 unique labels (top 2 + Other)
  expect_lte(length(unique(agg$taxon_label)), 3)
})

test_that("aggregate_taxa_contributions works with strat file input", {
  td <- create_contrib_test_data()
  strat <- read_strat_file(data = td$strat_wide)
  agg <- aggregate_taxa_contributions(strat, top_n = 4)

  expect_s3_class(agg, "data.frame")
  expect_true(all(c("sample", "function_id", "taxon_label", "contribution")
                  %in% colnames(agg)))
})

test_that("aggregate_taxa_contributions with daa_results_df filters pathways", {
  skip_if_not_installed("ggpicrust2")

  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)

  # Create mock DAA results with a pathway that maps to our KOs
  daa_results <- data.frame(
    feature = "ko00010",
    p_adjust = 0.01,
    stringsAsFactors = FALSE
  )

  # This may error if ko00010 KOs don't match our test KOs, which is expected
  # The important thing is the pathway→KO mapping logic works
  result <- tryCatch(
    aggregate_taxa_contributions(contrib, daa_results_df = daa_results),
    error = function(e) e
  )

  # Should either succeed or fail with informative message about no matching data
  if (inherits(result, "error")) {
    expect_match(result$message, "No data remaining|No significant")
  } else {
    expect_s3_class(result, "data.frame")
  }
})


# ---- taxa_contribution_bar ----

test_that("taxa_contribution_bar returns ggplot", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 4)

  p <- taxa_contribution_bar(agg, td$metadata, group = "group")
  expect_s3_class(p, "ggplot")
})

test_that("taxa_contribution_bar accepts facet_by = 'group'", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 4)

  p <- taxa_contribution_bar(agg, td$metadata, group = "group",
                             facet_by = "group")
  expect_s3_class(p, "ggplot")
})

test_that("taxa_contribution_bar supports metadata with a single group", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 4)
  td$metadata$group <- "Control"

  p <- taxa_contribution_bar(agg, td$metadata, group = "group")
  expect_s3_class(p, "ggplot")
})


# ---- taxa_contribution_heatmap ----

test_that("taxa_contribution_heatmap returns ggplot", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 4)

  p <- taxa_contribution_heatmap(agg, cluster_rows = FALSE, cluster_cols = FALSE)
  expect_s3_class(p, "ggplot")
})

test_that("taxa_contribution_heatmap with clustering returns patchwork", {
  skip_if_not_installed("ggdendro")

  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)
  agg <- aggregate_taxa_contributions(contrib, top_n = 4)

  p <- taxa_contribution_heatmap(agg, cluster_rows = TRUE, cluster_cols = TRUE)
  # With dendrograms, patchwork wraps the result
  expect_true(inherits(p, "ggplot") || inherits(p, "patchwork"))
})


# ---- Edge cases ----

test_that("aggregate_taxa_contributions errors on empty data after filtering", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)

  expect_error(
    aggregate_taxa_contributions(contrib, pathway_ids = "nonexistent_pathway"),
    "No data remaining"
  )
})

test_that("aggregate_taxa_contributions errors with invalid tax_level", {
  td <- create_contrib_test_data()
  contrib <- read_contrib_file(data = td$contrib_raw)

  expect_error(
    aggregate_taxa_contributions(contrib, taxonomy = td$taxonomy_qiime2,
                                 tax_level = "Superclass"),
    "Invalid tax_level"
  )
})

Try the ggpicrust2 package in your browser

Any scripts or data that you put into this service are public.

ggpicrust2 documentation built on April 10, 2026, 5:06 p.m.