Nothing
#' @title Check Data for Duplicates.
#'
#' @description Check the genotype and life history data for duplicate IDs (not
#' permitted) and duplicated genotypes (not advised), and count how many
#' individuals in the genotype data are not included in the life history data
#' (permitted). The order of IDs in the genotype and life history data is not
#' required to be identical.
#'
#' @param GenoM matrix with genotype data, size nInd x nSnp.
#' @param FortPARAM.dup list with Fortran-ready parameter values, as generated by
#' \code{\link{MkFortParams}}.
#' @param quiet suppress messages.
#'
#' @return A list with one or more of the following elements:
#' \item{DupGenoID}{Dataframe, row numbers of duplicated IDs in genotype data.
#' Please do remove or relabel these to avoid downstream confusion.}
#' \item{DupGenotype}{Dataframe, duplicated genotypes (with or without identical
#' IDs). The specified number of maximum mismatches is allowed, and this
#' dataframe may include pairs of closely related individuals. Mismatch =
#' number of SNPs at which genotypes differ, LLR = likelihood ratio between
#' 'self' and most likely non-self.}
#'
#' @seealso \code{\link{CheckLH}}, which performs the check for duplicated IDs
#' in the life history data, as well as for IDs (in genotype data) for which
#' no life history data is provided.
#'
#' @useDynLib sequoia, .registration = TRUE
# @useDynLib sequoia duplicates
#'
#' @keywords internal
DuplicateCheck <- function(GenoM = NULL,
FortPARAM.dup,
quiet)
{
on.exit(.Fortran(deallocall), add=TRUE)
# AgePriors not used, LifeHist not passed to Fortran
AP.dup <- MakeAgePrior(MaxAgeParent=10, Plot=FALSE, quiet=TRUE)
FortPARAM.dup[["SpecsInt"]]["nAgeCl"] <- nrow(AP.dup)
Ng <- nrow(GenoM)
gID <- rownames(GenoM)
DUP <- .Fortran(duplicates,
ng = as.integer(FortPARAM.dup$Ng),
specsint = as.integer(FortPARAM.dup$SpecsInt),
specsdbl = as.double(FortPARAM.dup$SpecsDbl),
errv = as.double(FortPARAM.dup$ErrM),
genofr = as.integer(GenoM),
sexrf = as.integer(rep(3, Ng)),
byrf = as.integer(rep(-999, 3*Ng)),
aprf = as.double(AP.dup),
ndupgenos = as.integer(0),
dupgenos = integer(2*Ng),
nmismatch = integer(Ng),
snpdboth = integer(Ng),
duplr = double(Ng))
Duplicates <- list()
if (any(duplicated(gID))) {
r1 <- which(duplicated(gID))
r2 <- which(duplicated(gID, fromLast=TRUE))
Duplicates$DupGenoID <- data.frame(row1 = r1,
row2 = r2,
ID = gID[r1])
}
if(DUP$ndupgenos>0) {
tmp <- VtoM(DUP$dupgenos, DUP$ndupgenos)
Duplicates$DupGenotype <- data.frame(row1 = tmp[, 1],
row2 = tmp[, 2],
ID1 = gID[tmp[, 1]],
ID2 = gID[tmp[, 2]],
Mismatch = DUP$nmismatch[seq_len(DUP$ndupgenos)],
SnpdBoth = DUP$snpdboth[seq_len(DUP$ndupgenos)],
LLR = DUP$duplr[seq_len(DUP$ndupgenos)])
Duplicates$DupGenotype <- Duplicates$DupGenotype[order(Duplicates$DupGenotype$LLR,
decreasing=TRUE), ]
}
# print warnings
if (!quiet) {
if (any(duplicated(gID))) cli::cli_alert_danger(c("duplicate IDs found in `GenoM`, ",
"please remove to avoid confusion"))
if (DUP$ndupgenos>0 && DUP$ndupgenos > sum(duplicated(gID))) {
cli::cli_alert_info(c('Found {ifelse(DUP$ndupgenos == Ng, ">=", "")}',
"{DUP$ndupgenos} likely duplicate genotypes,",
"please double check and consider removing"), wrap=TRUE)
}
}
return( Duplicates )
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.