tests/testthat/test-standardization.R

library(dplyr)
library(testthat)

test_that("get_centers returns expected result", {
  fake_input <- simulate_standardization_input(n_markers = 10, n_samples = 5, ploidy = 2, seed = 2025)

  # Example that marker has all 3 possible dosages
  m <- fake_input$standardization_input %>% filter(MarkerName == "m2")
  result <- get_centers(m, ploidy = 2)

  expect_type(result, "list")
  expect_named(result, c("rm", "centers_theta", "MarkerName", "n.clusters"))
  expect_true(length(result$centers_theta) == 3)
  expect_equal(result$centers_theta, c(0.03685, 0.47120, 0.97875), tolerance = 1e-4)

  # Example that marker has only 2 of the 3 possible dosages - 1 imputed
  m1 <- fake_input$standardization_input %>% filter(MarkerName == "m1")
  result <- get_centers(m1, ploidy = 2) # marker get flag 2 - will be removed
  expect_true(result$rm == 2)
  result <- get_centers(m1, ploidy = 2, n.clusters.thr = 2)


  expect_type(result, "list")
  expect_named(result, c("rm", "centers_theta", "MarkerName", "n.clusters"))
  expect_true(length(result$centers_theta) == 3)
  expect_equal(result$centers_theta, c(0.086750, 0.520875, 0.955000), tolerance = 1e-4)
})

test_that("get_baf works as expected", {
  centers_theta <- c(0.1, 0.5, 0.9)
  theta <- c(0.05, 0.3, 0.6, 0.95)

  bafs <- get_baf(theta, centers_theta, ploidy = 2)

  expect_equal(length(bafs), length(theta))
  expect_true(all(bafs >= 0 & bafs <= 1))
  expect_equal(sum(bafs), 1.875, tolerance = 1e-4)
})

test_that("get_baf_par returns correct BAFs for 5 markers and 4 samples", {
  set.seed(123)

  n_markers <- 5
  n_samples <- 4
  marker_ids <- paste0("m", 1:n_markers)
  sample_ids <- paste0("S", 1:n_samples)

  # Simulate theta values between 0 and 1 (e.g., sample allelic ratios)
  theta_matrix <- matrix(runif(n_markers * n_samples, min = 0.05, max = 0.95), nrow = n_markers)
  theta_df <- data.frame(ID = marker_ids, theta_matrix)
  colnames(theta_df)[-1] <- sample_ids

  # Simulate cluster centers (3 centers for diploid: 0, 0.5, 1)
  # We'll make them a bit variable to simulate real centroids
  centers_matrix <- t(replicate(n_markers, sort(c(
    runif(1, 0.05, 0.15),
    runif(1, 0.45, 0.55),
    runif(1, 0.85, 0.95)
  ))))
  centers_df <- data.frame(ID = marker_ids, centers_matrix)
  colnames(centers_df)[-1] <- 1:3

  # Prepare input list
  par_input <- list(theta_df, centers_df)

  # Call the function
  result <- get_baf_par(par_input, ploidy = 2)

  # Assertions
  expect_type(result, "list")
  expect_length(result, n_markers)
  expect_equal(sum(result[[1]]), 2.1509, tolerance = 0.001)
})

test_that("get_zscore returns z-score table", {
  fake_input <- simulate_standardization_input(n_markers = 10, n_samples = 5, ploidy = 2, seed = 2025)

  result <- get_zscore(data = fake_input$sample_data, geno.pos = fake_input$geno_pos)

  expect_s3_class(result, "data.frame")
  expect_true("z" %in% colnames(result))
  expect_equal(nrow(result), nrow(fake_input$sample_data))
  expect_equal(var(result$z), 0.8163, tolerance = 0.001)
})

test_that("standardize runs and returns expected object", {
  fake_input <- simulate_standardization_input(n_markers = 10, n_samples = 5, ploidy = 2, seed = 2025)

  result <- standardize(
    data = fake_input$sample_data,
    genos = fake_input$geno_data,
    geno.pos = fake_input$geno_pos,
    threshold.missing.geno = 1,
    threshold.geno.prob = 0.5,
    ploidy.standardization = 2,
    threshold.n.clusters = 2,
    n.cores = 1,
    type = "intensities",
    verbose = FALSE
  )

  expect_s3_class(result, "qploidy_standardization")
  expect_true("data" %in% names(result))
  expect_equal(sum(result$data$baf, na.rm = TRUE), 27.3644 ,tolerance = 0.001)


  temp <- tempfile()
  result <- standardize(
    data = fake_input$sample_data,
    genos = fake_input$geno_data,
    geno.pos = fake_input$geno_pos,
    threshold.missing.geno = 1,
    threshold.geno.prob = 0.5,
    ploidy.standardization = 2,
    threshold.n.clusters = 2,
    n.cores = 1,
    type = "counts",
    verbose = TRUE,
    out_filename = temp
  )

  expect_s3_class(result, "qploidy_standardization")
  expect_true("data" %in% names(result))
  expect_equal(sum(result$data$baf, na.rm = TRUE), 27.3644 ,tolerance = 0.001)

  # Check print.qploidy_standardization
  txt <- print(result)
  expect_equal(dim(txt), c(6,3))
  expect_equal(as.character(txt[4,3]), "(0 %)  ")

  # Check read.qploidy_standardization
  result <- read_qploidy_standardization(temp)

  expect_s3_class(result, "qploidy_standardization")
  expect_true("data" %in% names(result))
  expect_equal(sum(result$data$baf, na.rm = TRUE), 27.3644 ,tolerance = 0.001)

  # Check print.qploidy_standardization
  txt <- print(result)
  expect_equal(dim(txt), c(6,3))
  expect_equal(as.character(txt[4,3]), "(0 %)  ")
})

test_that("rm_outlier removes extreme values", {
  # Create a dataset with 19 'normal' values and 1 extreme outlier
  normal_theta <- rnorm(19, mean = 0.2, sd = 0.01)
  outlier_theta <- 3.0
  test_theta <- c(normal_theta, outlier_theta)

  test_df <- data.frame(theta = test_theta)
  cleaned <- rm_outlier(test_df)

  expect_true(length(cleaned) < nrow(test_df))
})


test_that("updog_centers returns correct output structure", {
  skip_if_not_installed("updog")

  library(dplyr)
  library(updog)

  set.seed(123)
  n_markers <- 5
  n_individuals <- 10
  ploidy <- 2

  # Simulate varying total read depth (size) for each marker × individual
  sizemat <- matrix(rpois(n_markers * n_individuals, lambda = 100), nrow = n_markers, ncol = n_individuals)
  sizemat[sizemat == 0] <- 1  # avoid 0 depth

  # Simulate reference reads with moderate allele bias
  refmat <- matrix(rbinom(n_markers * n_individuals, size = c(sizemat), prob = 0.7),
                   nrow = n_markers, ncol = n_individuals)
  altmat <- sizemat - refmat

  # Name dimensions
  marker_ids <- paste0("m", 1:n_markers)
  sample_ids <- paste0("ind", 1:n_individuals)

  rownames(refmat) <- rownames(altmat) <- rownames(sizemat) <- marker_ids
  colnames(refmat) <- colnames(altmat) <- colnames(sizemat) <- sample_ids

  # Create multidog object
  md <- multidog(
    refmat = refmat,
    sizemat = sizemat,
    ploidy = ploidy,
    model = "norm"
  )

  # No markers removed
  rm.mks <- character(0)

  result <- updog_centers(md, threshold.n.clusters = 2, rm.mks = rm.mks)

  # Basic checks
  expect_type(result, "list")
  expect_length(result, 5)
  expect_equal(sum(result$m1$centers_theta), 1.502, tolerance = 0.001)
})

Try the Qploidy package in your browser

Any scripts or data that you put into this service are public.

Qploidy documentation built on June 8, 2025, 10 a.m.