tests/testthat/test-cleanPedigree.R

test_that("standardizeColnames works", {
  library(BGmisc)
  library(dplyr)
  data(potter)
  # Test with a sample DataFrame
  expect_message(
    standardizeColnames(potter, verbose = TRUE),
    "Standardizing column names..."
  )

  standardized_df <- standardizeColnames(potter)
  expect_false("spouseID" %in% names(standardized_df))
  expect_true("spID" %in% names(standardized_df))

  # Check if the column names are standardized
  data_fixable_names <- potter %>% dplyr::rename(
    familyID = famID,
    dame = momID,
    dadid = dadID
  )

  renamed_fixed <- standardizeColnames(data_fixable_names)
  expect_false("spouseID" %in% names(renamed_fixed))
  expect_true("spID" %in% names(renamed_fixed))
  expect_false("familyID" %in% names(renamed_fixed))
  expect_true("famID" %in% names(renamed_fixed))
  expect_false("dame" %in% names(renamed_fixed))
  expect_true("momID" %in% names(renamed_fixed))
  expect_false("dadid" %in% names(renamed_fixed))
  expect_true("dadID" %in% names(renamed_fixed))

  data_unfixable_names <- potter %>% dplyr::rename(
    zfamID = famID,
    dadIDz = dadID,
    zmomID = momID,
    spID2 = spouseID
  )

  renamed_unfixed <- standardizeColnames(data_unfixable_names)
  expect_true("zfamID" %in% names(renamed_unfixed))
  expect_false("famID" %in% names(renamed_unfixed))
  expect_true("dadIDz" %in% names(renamed_unfixed))
  expect_false("dadID" %in% names(renamed_unfixed))
  expect_true("zmomID" %in% names(renamed_unfixed))
  expect_false("momID" %in% names(renamed_unfixed))
  expect_true("spID2" %in% names(renamed_unfixed))
  expect_false("spID" %in% names(renamed_unfixed))
})


# standardizeColnames <- function(df, verbose = FALSE) {
#   # Internal mapping of standardized names to possible variants
#   mapping <- list(
#     "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
#     "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
#     "gen" = "^(?:gen(?:s|eration)?)",
#     "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
#     "patID" = "^(?:dat[\\.\\-_]?id|pat[\\.\\-_]?id|paternal[\\.\\-_]?(?:id)?)",
#     "momID" = "^(?:m(?:om|a|other)?[\\.\\-_]?id|pid[\\.\\-_]?moth[er]*|dame)",
#     "matID" = "^(?:mat[\\.\\-_]?id|maternal[\\.\\-_]?(?:id)?)",
#     "spID" = "^(?:s(?:pt)?id|spouse[\\.\\-_]?(?:id)?|partner[\\.\\-_]?(?:id)?|husb(?:and)?[\\.\\-_]?id|wife[\\.\\-_]?(?:id)?|pid[\\.\\-_]?spouse1?)",
#     "twinID" = "^(?:twin[\\.\\-_]?(?:id)?)",
#     "sex" = "^(?:sex|gender|female|m(?:a(?:le|n)|en)|wom[ae]n)"
#   )

Try the BGmisc package in your browser

Any scripts or data that you put into this service are public.

BGmisc documentation built on June 11, 2025, 1:07 a.m.