test-pedanalysis.R
In visPedigree: Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees

library(testthat)
library(visPedigree)
library(data.table)

# Create a minimal testing dataset
test_ped_dt <- data.table::data.table(
  Ind = c("A", "B", "C", "D", "E", "F"),
  Sire = c(NA, NA, "A", "A", "C", "C"),
  Dam = c(NA, NA, "B", "B", "D", "D"),
  Sex = c("male", "female", "male", "female", "male", "female"),
  Gen = c(1, 1, 2, 2, 3, 3),
  Group = c("G1", "G1", "G2", "G2", "G3", "G3")
)
# C and D are full siblings (F _ offspring = 0.25). 
# E and F are offspring of full siblings.

test_ped <- suppressMessages(tidyped(test_ped_dt, reference = c("E", "F")))

test_that("pedrel calculates relation correctly and handles boundaries", {
  # N < 2 case for G1 (after subsetting by Gen, if one was dropped, or just test N=1)
  dt_n1 <- data.table::data.table(Ind = c("A", "B", "C"), Sire = c(NA, NA, "A"), Dam = c(NA, NA, "B"), Sex = c("male", "female", "male"), Gen = c(1, 1, 2))
  ped_n1 <- suppressMessages(tidyped(dt_n1))
  
  res_n1 <- suppressWarnings(pedrel(ped_n1, by = "Gen"))
  expect_equal(res_n1[Gen == 2, NUsed], 1)
  expect_true(is.na(res_n1[Gen == 2, MeanRel]))
  
  # For test_ped, group by Gen
  rel <- pedrel(test_ped, by = "Gen")
  
  # Gen 1: A, B (Unrelated) -> 0
  expect_equal(rel[Gen == 1, MeanRel], 0)
  
  # Gen 2: C, D (full sibs). a_ij = 0.5. Mean of off-diagonals = 0.5
  expect_equal(rel[Gen == 2, MeanRel], 0.5)
  
  # Test sample size behavior (less than 2 individuals after filtering)
  rel_sample <- suppressWarnings(pedrel(test_ped, by = "Gen", reference = c("A", "C")))
  expect_true(all(is.na(rel_sample$MeanRel)))
  
  # Test with a different by parameter (e.g. BirthYear)
  test_ped$YearGroup <- c(1, 1, 2, 2, 3, 3) 
  rel_year <- pedrel(test_ped, by = "YearGroup")
  expect_true("YearGroup" %in% names(rel_year))
  expect_false("Group" %in% names(rel_year))
  expect_equal(rel_year[YearGroup == 2, MeanRel], 0.5) # C, D in YearGroup 2
  
  # Test reference parameter filtering with another by variable
  # If reference is only C, D, E, then YearGroup 3 should only have E left -> NUsed=1, NA MeanRel
  rel_year_ref <- suppressWarnings(pedrel(test_ped, by = "YearGroup", reference = c("C", "D", "E")))
  expect_equal(rel_year_ref[YearGroup == 2, NUsed], 2)
  expect_equal(rel_year_ref[YearGroup == 2, MeanRel], 0.5)
  expect_equal(rel_year_ref[YearGroup == 3, NUsed], 1)
  expect_true(is.na(rel_year_ref[YearGroup == 3, MeanRel]))

  coan <- pedrel(test_ped, by = "Gen", scale = "coancestry")
  expect_true("MeanCoan" %in% names(coan))
  expect_false("MeanRel" %in% names(coan))
  expect_equal(coan[Gen == 1, MeanCoan], 0.25)
  expect_equal(coan[Gen == 2, MeanCoan], 0.375)
  expect_equal(coan[Gen == 3, MeanCoan], 0.5)
})

test_that("pedrel captures deep inbreeding and correct ancestor tracing", {
  # Deep inbreeding: Full-sib mating (Gen 3), then their offspring full-sib mating (Gen 4)
  dt_deep <- data.table::data.table(
    Ind  = c("A", "B", "C", "D", "E", "F", "G", "H"),
    Sire = c(NA, NA, "A", "A", "C", "C", "E", "E"),
    Dam  = c(NA, NA, "B", "B", "D", "D", "F", "F"),
    Gen  = c(1, 1, 2, 2, 3, 3, 4, 4)
  )
  tp_deep <- tidyped(dt_deep)
  rel_deep <- pedrel(tp_deep, by = "Gen")
  
  # Gen 1: Unrelated founders
  expect_equal(rel_deep[Gen == 1, MeanRel], 0.0)
  
  # Gen 2: C, D are full sibs (0.5)
  expect_equal(rel_deep[Gen == 2, MeanRel], 0.5)
  
  # Gen 3: E, F's parents are C, D (full sibs). a_EF = 0.5 + 0.5 * f_CD = 0.5 + 0.5 * 0.5 = 0.75
  # Error check: if ancestor tracing failed, this would be 0.5
  expect_equal(rel_deep[Gen == 3, MeanRel], 0.75)
  
  # Gen 4: G, H's parents are E, F. a_GH = 0.5 + 0.5 * f_EF = 0.5 + 0.5 * 1.0 (since F_E=0.25+0.25*0.5=0.375? No, a_EF=0.75)
  # Actually a_GH = 0.5 + 0.5 * a_EF = 0.5 + 0.5 * 0.75 = 0.875? 
  # Wait, manual calibration: 
  # A_mat[G, H] = 0.5 * (A[E, G] + A[F, G]) = 0.5 * (0.5*(A[E,E]+A[E,F]) + 0.5*(A[F,E]+A[F,F]))
  # A[E,E] = 1 + f_CD = 1 + 0.25 = 1.25. A[E,F] = 0.75.
  # A_mat[G,H] = 0.5 * (0.5*(1.25+0.75) + 0.5*(0.75+1.25)) = 0.5 * (1.0 + 1.0) = 1.0
  expect_equal(rel_deep[Gen == 4, MeanRel], 1.0)

  coan_deep <- pedrel(tp_deep, by = "Gen", scale = "coancestry")
  expect_equal(coan_deep[Gen == 1, MeanCoan], 0.25)
  expect_equal(coan_deep[Gen == 2, MeanCoan], 0.375)
  expect_equal(coan_deep[Gen == 3, MeanCoan], 0.5)
  expect_equal(coan_deep[Gen == 4, MeanCoan], 0.59375)
})

test_that("pedgenint computes Average from all parent-offspring pairs", {
  test_ped$BirthYear <- c(2000, 2001, 2005, 2006, 2010, 2012)
  # SS: C-A (5), E-C (5) -> mean=5

  # SD: D-A (6), F-C (7) -> mean=6.5
  # DS: C-B (4), E-D (4) -> mean=4
  # DD: D-B (5), F-D (6) -> mean=5.5
  # Average from ALL 8 pairs: (5+6+5+7+4+5+4+6)/8 = 5.25
  # Note: numeric years auto-converted to Date (YYYY-07-01); leap years cause
  # tiny deviations from exact integers, so we use tolerance = 0.01.
  
  suppressMessages(
    genint_res <- pedgenint(test_ped, timevar = "BirthYear", unit = "year")
  )
  
  avg_res <- genint_res[Pathway == "Average"]
  expect_equal(avg_res$Mean, 5.25, tolerance = 0.01)
  expect_equal(avg_res$N, 8L)
  expect_true(!is.na(avg_res$SD))
  
  # Sex-specific pathways should still work
  expect_equal(genint_res[Pathway == "SS", Mean], 5, tolerance = 0.01)
  expect_equal(genint_res[Pathway == "SD", Mean], 6.5, tolerance = 0.01)
  expect_equal(genint_res[Pathway == "DS", Mean], 4, tolerance = 0.01)
  expect_equal(genint_res[Pathway == "DD", Mean], 5.5, tolerance = 0.01)
  
  # SO/DO: sex-independent pathways
  # SO (Sire→Offspring): A→C(5), A→D(6), C→E(5), C→F(7) -> N=4, Mean=5.75
  so_res <- genint_res[Pathway == "SO"]
  expect_equal(so_res$N, 4L)
  expect_equal(so_res$Mean, 5.75, tolerance = 0.01)
  
  # DO (Dam→Offspring): B→C(4), B→D(5), D→E(4), D→F(6) -> N=4, Mean=4.75
  do_res <- genint_res[Pathway == "DO"]
  expect_equal(do_res$N, 4L)
  expect_equal(do_res$Mean, 4.75, tolerance = 0.01)
  
  # All 7 pathways present
  expect_equal(sort(genint_res$Pathway),
               c("Average", "DD", "DO", "DS", "SD", "SO", "SS"))
})

test_that("pedgenint works with Date and character date inputs", {
  # Date input — exact control, no auto-conversion message
  test_ped$BirthDate <- as.Date(c(
    "2000-03-15", "2001-06-20",  # A, B
    "2005-03-15", "2006-06-20",  # C, D
    "2010-03-15", "2012-06-20"   # E, F
  ))
  genint_date <- suppressMessages(
    pedgenint(test_ped, timevar = "BirthDate", unit = "day")
  )
  expect_true(all(genint_date$Mean > 0))
  expect_true("Average" %in% genint_date$Pathway)

  # Character date input
  test_ped$BirthStr <- as.character(test_ped$BirthDate)
  genint_str <- suppressMessages(
    pedgenint(test_ped, timevar = "BirthStr", unit = "month")
  )
  expect_true(all(genint_str$Mean > 0))

  # unit = "hour"
  genint_hr <- suppressMessages(
    pedgenint(test_ped, timevar = "BirthDate", unit = "hour")
  )
  expect_true(all(genint_hr$Mean > 0))
})

test_that("pedcontrib f_e and f_a compute on full sets despite top cutoff", {
  cont_all <- suppressMessages(pedcontrib(test_ped, mode = "both", top = 100))
  cont_top1 <- suppressMessages(pedcontrib(test_ped, mode = "both", top = 1))
  
  # f_e and f_a should be identical because they're based on full arrays before truncation
  expect_equal(cont_all$summary$f_e, cont_top1$summary$f_e)
  expect_equal(cont_all$summary$f_a, cont_top1$summary$f_a)
  
  # Ensure reported reflects top
  expect_equal(cont_top1$summary$n_founder_show, 1)
  expect_equal(cont_all$summary$n_founder_show, length(cont_all$founders$Ind))
  expect_true(cont_top1$summary$n_founder > 1) 
})

test_that("pedpartial and pedancestry run without addnum=TRUE initially", {
  # Create tidyped without IndNum (addnum = FALSE)
  ped_nonum <- suppressMessages(tidyped(test_ped_dt, addnum = FALSE))
  
  expect_false("IndNum" %in% names(ped_nonum))
  
  # Should not crash and automatically handle missing nums
  part_res <- pedpartial(ped_nonum, ancestors = c("A", "B"), top = 2)
  expect_true(all(c("A", "B") %in% names(part_res)))
  expect_equal(nrow(part_res), nrow(ped_nonum))
  
  # pedancestry with labels
  ped_nonum$Label <- ifelse(ped_nonum$Ind %in% c("A", "B"), "Base", NA)
  anc_res <- pedancestry(ped_nonum, foundervar = "Label")
  expect_true("Base" %in% names(anc_res))
  # All non-founders descend completely from A and B, so Base should be 1.0 for all
  expect_true(all(anc_res$Base == 1.0))
})

# --- Additional tests for pedne, pedecg, pedsubpop, pedfclass ---

test_that("pedecg computes equivalent complete generations", {
  ecg_res <- pedecg(test_ped)
  expect_true(all(c("ECG", "FullGen", "MaxGen") %in% names(ecg_res)))
  expect_equal(nrow(ecg_res), nrow(test_ped))
  # Founders A, B have ECG = 0
  expect_equal(ecg_res[Ind == "A", ECG], 0)
  expect_equal(ecg_res[Ind == "B", ECG], 0)
  # C has 2 known parents -> ECG = 1
  expect_equal(ecg_res[Ind == "C", ECG], 1)
  # E has parents C, D who each have ECG=1 -> ECG = 1 + (1+1)/2 = 2
  expect_equal(ecg_res[Ind == "E", ECG], 2)
})

test_that("pedne computes effective population size by cohort", {
  test_ped$BirthYear <- c(2000, 2000, 2005, 2005, 2010, 2010)
  res <- suppressMessages(pedne(test_ped, by = "BirthYear"))
  expect_true(all(c("Cohort", "N", "DeltaC", "Ne") %in% names(res)))
  # Only cohort 2010 has ECG > 1 (founders/gen1 filtered out)
  expect_equal(nrow(res), 3)
  expect_equal(res$Cohort, c(2000, 2005, 2010))
  expect_true(any(res$DeltaC >= 0, na.rm=TRUE))
  expect_true(any(res$Ne > 0 & is.finite(res$Ne)))
})

test_that("pedsubpop splits pedigree by grouping variable", {
  res_gen <- pedsubpop(test_ped, by = "Gen")
  expect_true(is.data.table(res_gen))
  expect_equal(nrow(res_gen), length(unique(test_ped$Gen)))
  expect_true(all(c("Group", "N") %in% names(res_gen)))
  # Gen 1 (Group=1) has 2 individuals
  expect_equal(res_gen[Group == 1, N], 2)
  expect_equal(res_gen[Group == 2, N], 2)
})

test_that("pedsubpop summarizes disconnected groups and isolated individuals", {
  ped_multi_dt <- data.table::data.table(
    Ind = c("A", "B", "C", "D", "E", "F", "ISO"),
    Sire = c(NA, NA, "A", NA, NA, "D", NA),
    Dam = c(NA, NA, "B", NA, NA, "E", NA),
    Sex = c("male", "female", "male", "male", "female", "male", NA_character_)
  )
  ped_multi <- suppressMessages(tidyped(ped_multi_dt))

  res_null <- pedsubpop(ped_multi)

  expect_true(is.data.table(res_null))
  expect_equal(nrow(res_null), 3)
  expect_true(all(c("GP1", "GP2", "Isolated") %in% res_null$Group))

  expect_equal(res_null[Group == "GP1", N], 3)
  expect_equal(res_null[Group == "GP1", N_Founder], 2)

  expect_equal(res_null[Group == "GP2", N], 3)
  expect_equal(res_null[Group == "GP2", N_Founder], 2)

  expect_equal(res_null[Group == "Isolated", N], 1)
  expect_equal(res_null[Group == "Isolated", N_Sire], 0)
  expect_equal(res_null[Group == "Isolated", N_Dam], 0)
  expect_equal(res_null[Group == "Isolated", N_Founder], 1)
})

test_that("pedfclass classifies inbreeding levels", {
  res <- pedfclass(test_ped)
  expect_true(is.data.table(res))
  expect_true("FClass" %in% names(res))
  expect_s3_class(res$FClass, "ordered")
  # A, B, C, D have f=0 (4 individuals); E, F have f=0.25 (2 individuals)
  expect_equal(sum(res$Count), nrow(test_ped))
  expect_equal(res[FClass == "F = 0", Count], 4)
  expect_equal(res[FClass == "0.125 < F <= 0.25", Count], 2)
})

test_that("pedfclass handles threshold boundaries correctly", {
  threshold_ped <- data.table(
    Ind = c("A", "B", "C", "D", "E"),
    Sire = NA_character_,
    Dam = NA_character_,
    Sex = c("male", "female", "male", "female", "male"),
    f = c(0, 0.0625, 0.125, 0.25, 0.250001)
  )
  threshold_ped <- new_tidyped(threshold_ped)

  res <- pedfclass(threshold_ped)

  expect_equal(res[FClass == "F = 0", Count], 1L)
  expect_equal(res[FClass == "0 < F <= 0.0625", Count], 1L)
  expect_equal(res[FClass == "0.0625 < F <= 0.125", Count], 1L)
  expect_equal(res[FClass == "0.125 < F <= 0.25", Count], 1L)
  expect_equal(res[FClass == "F > 0.25", Count], 1L)
})

test_that("pedfclass supports custom breaks", {
  threshold_ped <- data.table(
    Ind = c("A", "B", "C", "D", "E", "F", "G"),
    Sire = NA_character_,
    Dam = NA_character_,
    Sex = c("male", "female", "male", "female", "male", "female", "male"),
    f = c(0, 0.03125, 0.0625, 0.125, 0.25, 0.5, 0.6)
  )
  threshold_ped <- new_tidyped(threshold_ped)

  res <- pedfclass(threshold_ped, breaks = c(0.03125, 0.0625, 0.125, 0.25, 0.5))

  # labels auto-generated: 5 bounded + 1 tail = 6 positives, plus "F = 0" = 7 levels
  expect_equal(
    as.character(res$FClass),
    c("F = 0", "0 < F <= 0.03125", "0.03125 < F <= 0.0625",
      "0.0625 < F <= 0.125", "0.125 < F <= 0.25", "0.25 < F <= 0.5", "F > 0.5")
  )
  expect_equal(res[FClass == "0 < F <= 0.03125",      Count], 1L)
  expect_equal(res[FClass == "0.03125 < F <= 0.0625", Count], 1L)
  expect_equal(res[FClass == "0.0625 < F <= 0.125",   Count], 1L)
  expect_equal(res[FClass == "0.125 < F <= 0.25",     Count], 1L)
  expect_equal(res[FClass == "0.25 < F <= 0.5",       Count], 1L)
  expect_equal(res[FClass == "F > 0.5",               Count], 1L)
})

test_that("pedfclass respects custom labels aligned to breaks", {
  threshold_ped <- data.table(
    Ind = c("A", "B", "C", "D", "E"),
    Sire = NA_character_,
    Dam = NA_character_,
    Sex = c("male", "female", "male", "female", "male"),
    f = c(0, 0.0625, 0.125, 0.25, 0.4)
  )
  threshold_ped <- new_tidyped(threshold_ped)

  res <- pedfclass(
    threshold_ped,
    breaks = c(0.0625, 0.125, 0.25),
    labels = c("Low", "Moderate", "High")
  )
  # tail auto-generated as "F > 0.25"
  expect_equal(as.character(res$FClass), c("F = 0", "Low", "Moderate", "High", "F > 0.25"))
  expect_equal(res[FClass == "Low",       Count], 1L)
  expect_equal(res[FClass == "High",      Count], 1L)
  expect_equal(res[FClass == "F > 0.25",  Count], 1L)

  # Wrong length should error
  expect_error(
    pedfclass(threshold_ped, breaks = c(0.0625, 0.125, 0.25), labels = c("A", "B")),
    "length equal to length\\(breaks\\)"
  )
})

test_that("pedstats returns correct structure without timevar", {
  # Use a pedigree without any time column to test no-timevar path
  ped_no_time <- data.table::data.table(
    ID = c("A", "B", "C", "D"),
    Sire = c(NA, NA, "A", "A"),
    Dam = c(NA, NA, "B", "B"),
    Sex = c("male", "female", "male", "female")
  )
  tp <- suppressMessages(tidyped(ped_no_time))
  ps <- suppressMessages(pedstats(tp))

  # class
  expect_s3_class(ps, "pedstats")

  # $summary columns and types
  expect_true(is.data.table(ps$summary))
  expect_named(ps$summary, c("N", "NSire", "NDam", "NFounder", "MaxGen"))
  expect_equal(ps$summary$N, nrow(tp))
  expect_true(ps$summary$NFounder > 0)

  # $ecg columns
  expect_true(is.data.table(ps$ecg))
  expect_named(ps$ecg, c("Ind", "ECG", "FullGen", "MaxGen"))
  expect_equal(nrow(ps$ecg), nrow(tp))

  # no timevar -> gen_intervals is NULL
  expect_null(ps$gen_intervals)
})

test_that("pedstats returns gen_intervals with timevar", {
  tp2 <- suppressMessages(tidyped(big_family_size_ped))
  ps2 <- pedstats(tp2, timevar = "Year")

  # $gen_intervals columns
  expect_true(is.data.table(ps2$gen_intervals))
  expect_true(all(c("Pathway", "N", "Mean", "SD") %in% names(ps2$gen_intervals)))
  expect_true("Average" %in% ps2$gen_intervals$Pathway)

  # ecg = FALSE suppresses ecg
  ps_no_ecg <- pedstats(tp2, timevar = "Year", ecg = FALSE)
  expect_null(ps_no_ecg$ecg)

  # genint = FALSE suppresses gen_intervals
  ps_no_gi <- pedstats(tp2, timevar = "Year", genint = FALSE)
  expect_null(ps_no_gi$gen_intervals)
})

test_that("pedancestry proportions sum to 1 for multi-line admixture", {
  ped_df <- data.table::data.table(
    Ind = c("A","B","C","D", "E","F","G"),
    Sire = c(NA, NA, NA, NA, "A","C","E"),
    Dam =  c(NA, NA, NA, NA, "B","D","F"),
    Sex = c("male","female","male","female", "male","female","male")
  )
  ped_df$Line <- c("Line1", "Line1", "Line2", "Line2", NA, NA, NA)

  tp <- suppressMessages(tidyped(ped_df))
  res <- pedancestry(tp, foundervar = "Line")

  # Calculate sum of all label columns for each individual
  res[, Sum := rowSums(.SD, na.rm = TRUE), .SDcols = c("Line1", "Line2")]
  
  # Check specific admixture for the final hybrid (G)
  expect_equal(res[Ind == "G", Line1], 0.5)
  expect_equal(res[Ind == "G", Line2], 0.5)
  
  # All proportions must sum strictly to 1.0 (with floating point tolerance)
  expect_true(all(abs(res$Sum - 1.0) < 1e-12))
})

test_that("pedrel results match between compact and non-compact modes", {
  # Use a medium subset to explicitly test folding performance equivalence
  # 500 rows is large enough to contain multi-generational sib-groups for compression
  ped_sub <- head(visPedigree::big_family_size_ped, 500)
  tp <- suppressMessages(tidyped(ped_sub))

  rel_comp <- pedrel(tp, compact = TRUE)
  rel_full <- pedrel(tp, compact = FALSE)

  expect_equal(rel_comp, rel_full)

  coan_comp <- pedrel(tp, compact = TRUE, scale = "coancestry")
  coan_full <- pedrel(tp, compact = FALSE, scale = "coancestry")

  expect_equal(coan_comp, coan_full)
})
Any scripts or data that you put into this service are public.
visPedigree documentation built on March 30, 2026, 9:07 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
visPedigree
Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees

tests/testthat/test-pedanalysis.R
In visPedigree: Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees

Try the visPedigree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

visPedigree Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees

tests/testthat/test-pedanalysis.R In visPedigree: Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees

Try the visPedigree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

visPedigree
Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees

tests/testthat/test-pedanalysis.R
In visPedigree: Tidying, Analysis, and Fast Visualization of Animal and Plant Pedigrees