Nothing
#' @title Find Putative Relatives
#'
#' @description Identify pairs of individuals likely to be related, but not
#' assigned as such in the provided pedigree.
#'
#' @param SeqList list with output from \code{\link{sequoia}}.
#' \code{SeqList$Pedigree} is used if present, and \code{SeqList$PedigreePar}
#' otherwise, and overrides the input parameter \code{Pedigree}. If 'Specs' is
#' present, its elements override all input parameters with the same name. The
#' list elements `LifeHist', `AgePriors', and `ErrM' are also used if
#' present, and similarly override the corresponding input parameters.
#' @param Pedigree dataframe with id - dam - sire in columns 1-3. May include
#' non-genotyped individuals, which will be treated as dummy individuals. When
#' provided, all likelihoods (and thus all maybe-relatives) are conditional on
#' this pedigree. Note: \code{SeqList$Pedigree} or \code{SeqList$PedigreePar} take
#' precedent (for this function only).
#' @param AgePrior Agepriors matrix, as generated by \code{\link{MakeAgePrior}}
#' and included in the \code{\link{sequoia}} output. Affects which
#' relationships are considered possible (only those where \eqn{P(A|R) / P(A)
#' > 0}).
#' @param Module type of relatives to check for. One of
#' \describe{
#' \item{par}{parent - offspring pairs}
#' \item{ped}{all first and second degree relatives}
#' }
#' When 'par', all pairs are returned that are more likely parent-offspring
#' than unrelated, potentially including pairs that are even more likely to
#' be otherwise related.
#' @param MaxPairs the maximum number of putative pairs to return.
#' @param quiet logical, suppress messages.
#' @param ParSib \strong{DEPRECATED, use \code{Module}} either 'par' to check
#' for putative parent-offspring pairs only, or 'sib' to check for all types
#' of first and second degree relatives.
#' @inheritParams sequoia
#'
#' @return A list with
#' \item{MaybePar}{A dataframe with non-assigned likely parent-offspring pairs,
#' with columns:
#' \itemize{
#' \item ID1
#' \item ID2
#' \item TopRel: the most likely relationship, using abbreviations listed
#' below
#' \item LLR: Log10-Likelihood Ratio between most likely and next most
#' likely relationship
#' \item OH: Number of loci at which the two individuals are opposite
#' homozygotes
#' \item BirthYear1: Birth year of ID1 (copied from LifeHistData)
#' \item BirthYear2
#' \item AgeDif: Age difference; BirthYear1 - BirthYear2
#' \item Sex1: Sex of ID1 (copied from LifeHistData)
#' \item Sex2
#' \item SnpdBoth: Number of loci at which the two individuals are both
#' successfully genotyped
#' }}
#' \item{MaybeRel}{A dataframe with non-assigned likely pairs of relatives,
#' with columns identical to \code{MaybePar}}
#' \item{MaybeTrio}{A dataframe with non-assigned parent-parent-offspring
#' trios, with columns:
#' \itemize{
#' \item ID
#' \item parent1
#' \item parent2
#' \item TopRel: the most likely relationship, using abbreviations listed
#' below
#' \item LLRparent1: Log10-Likelihood Ratio between parent1 being a
#' parent of ID vs the next most likely relationship between the pair,
#' ignoring parent2
#' \item LLRparent2: as LLRparent1
#' \item LLRpair: LLR for the parental pair, versus the next most likely
#' configuration between the three individuals (with one or neither
#' parent assigned)
#' \item OHparent1: Number of loci at which ID and parent1 are opposite
#' homozygotes
#' \item OHparent2: as OHparent1
#' \item MEpair: Number of Mendelian errors between the offspring and the
#' parent pair, includes OH as well as e.g. parents being opposing
#' homozygotes, but the offspring not being a heterozygote. The offspring
#' being OH with both parents is counted as 2 errors.
#' \item SNPd.id.parent1: Number of loci at which ID and parent1 are both
#' successfully genotyped
#' \item SNPd.id.parent2: as SNPd.id.parent1
#' }}
#' The following categories are used in column 'TopRel', indicating the most
#' likely relationship category:
#' \item{PO}{Parent-Offspring}
#' \item{FS}{Full Siblings}
#' \item{HS}{Half Siblings}
#' \item{GP}{GrandParent - grand-offspring}
#' \item{FA}{Full Avuncular (aunt/uncle)}
#' \item{2nd}{2nd degree relatives, not enough information to distinguish
#' between HS,GP and FA}
#' \item{Q}{Unclear, but probably 1st, 2nd or 3rd degree relatives}
#'
#' @details When \code{Module="par"}, the age difference of the putative pair is
#' temporarily set to NA so that genetic parent-offspring pairs declared to be
#' born in the same year may be discovered. When \code{Module="ped"}, only
#' relationships possible given the age difference, if known from the
#' LifeHistData, are considered.
#'
#' @seealso \code{\link{sequoia}} to identify likely pairs of duplicate
#' genotypes and for pedigree reconstruction; \code{\link{GetRelM}} to
#' identify all pairs of relatives in a pedigree; \code{\link{CalcPairLL}} for
#' the likelihoods underlying the LLR.
#'
#' @examples
#' \dontrun{
#' # without conditioning on pedigree
#' MaybeRel_griffin <- GetMaybeRel(GenoM=Geno_griffin, Err=0.001, Module='par')
#' }
#' names(MaybeRel_griffin)
#'
#' # conditioning on pedigree
#' MaybePO <- GetMaybeRel(GenoM = Geno_griffin, SeqList = SeqOUT_griffin,
#' Module = 'par')
#' head(MaybePO$MaybePar)
#'
#' # instead of providing the entire SeqList, one may specify the relevant
#' # elements separately
#' Maybe <- GetMaybeRel(GenoM = Geno_griffin,
#' Pedigree = SeqOUT_griffin$PedigreePar,
#' LifeHistData = LH_griffin,
#' Err=0.0001, Complex = "full",
#' Module = "ped")
#' head(Maybe$MaybeRel)
#'
#' # visualise results, turn dataframe into matrix first:
#' MaybeM <- GetRelM(Pairs = Maybe$MaybeRel)
#' PlotRelPairs(MaybeM)
#' # or combine with pedigree (note suffix '?')
#' RelM <- GetRelM(Pedigree =SeqOUT_griffin$PedigreePar, Pairs = Maybe$MaybeRel)
#' PlotRelPairs(RelM)
#'
#' @useDynLib sequoia, .registration = TRUE
#'
#' @export
GetMaybeRel <- function(GenoM = NULL,
SeqList = NULL,
Pedigree = NULL,
LifeHistData = NULL,
AgePrior = NULL,
Module = "par",
Complex = "full",
Herm = "no",
Err = 0.0001,
ErrFlavour = "version2.9",
Tassign = 0.5,
Tfilter = -2.0,
MaxPairs = 7*nrow(GenoM),
quiet = FALSE,
ParSib = NULL, # DEPRECATED
MaxMismatch = NA) # DEPRECATED
{
on.exit(.Fortran(deallocall), add=TRUE)
# backwards compatibility, until ParSib deprecated:
if (!is.null(ParSib)) {
if (ParSib %in% c("par", "sib", "ped")) {
Module <- switch(ParSib, par = "par", sib = "ped", ped="ped")
} else {
stop("'ParSib' must be 'par' or 'sib'; or use only 'Module' instead")
}
}
if (!Module %in% c("par", "ped")) stop("'Module' must be 'par' or 'ped'")
if(!quiet) cli::cli_alert_info(paste0("Searching for non-assigned ",
c(par="parent-offspring", ped="relative")[Module], " pairs ...",
" (Module = ", Module, ")"))
# unpack SeqList ----
if (!is.null(SeqList)) {
if (!inherits(SeqList, 'list')) stop("'SeqList' must be a list")
NewName = c(Pedigree = "Pedigree",
PedigreePar = "Pedigree",
LifeHist = "LifeHistData",
AgePriors = "AgePrior")
for (x in names(NewName)) {
# if (x %in% c("Pedigree", "PedigreePar") & !is.null(Pedigree)) next
## SeqList$Pedigree takes precedent for this function only.
if (x %in% names(SeqList)) {
if (x == "PedigreePar" & "Pedigree" %in% names(SeqList)) next
if (!quiet) cli::cli_alert_info("using {x} in SeqList")
assign(NewName[x], SeqList[[x]])
}
}
}
# input check ----
if (!quiet %in% c(TRUE, FALSE)) stop("'quiet' must be TRUE or FALSE")
GenoM <- CheckGeno(GenoM, quiet=quiet, Plot=FALSE)
gID <- rownames(GenoM)
Pedigree <- PedPolish(Pedigree, gID, DropNonSNPd = FALSE,
NullOK = TRUE) # OK if no pedigree
if (!quiet) {
if (is.null(Pedigree)) {
cli::cli_alert_info("Not conditioning on any pedigree")
} else {
N <- c(i=nrow(Pedigree), d=sum(!is.na(Pedigree$dam)), s=sum(!is.na(Pedigree$sire)))
cli::cli_alert_info("Conditioning on pedigree with {N['i']} individuals, {N['d']} dams and {N['s']} sires")
}
}
LH <- CheckLH(LifeHistData, gID, sorted=TRUE)
if (!is.null(AgePrior)) {
AgePrior <- CheckAP(AgePrior)
} else {
AgePrior <- MakeAgePrior(Pedigree, LifeHistData, Plot=FALSE, quiet=quiet)
}
# Specs / param ----
if ("Specs" %in% names(SeqList)) {
if(!quiet) cli::cli_alert_info("settings in SeqList$Specs will overrule input parameters")
SeqList$Specs$Module <- Module
PARAM <- SpecsToParam(SeqList$Specs, SeqList$ErrM, ErrFlavour,
dimGeno = dim(GenoM), Module, MaxPairs, quiet) # overrule values in SeqList
} else {
PARAM <- namedlist(dimGeno = dim(GenoM),
MaxPairs,
Err,
ErrFlavour,
Tfilter,
Tassign,
nAgeClasses = nrow(AgePrior),
MaxSibshipSize = max(table(Pedigree$dam), table(Pedigree$sire), 90,
na.rm=TRUE) +10,
Module = as.character(Module),
Complex,
Herm,
quiet)
PARAM$ErrM <- ErrToM(Err, flavour = ErrFlavour, Return = "matrix")
}
# MaxMismatch ----
# vector with max. mismatches for duplicates, PO pairs, PPO trios
if (!"MaxMismatchV" %in% names(PARAM)) { # DUP/OH/ME from version 2.0 onwards
sts <- SnpStats(GenoM, Plot=FALSE)
PARAM$MaxMismatchV <- setNames(CalcMaxMismatch(Err=PARAM$ErrM,
MAF=sts[,"AF"],
ErrFlavour=PARAM$ErrFlavour,
qntl=0.9999^(1/nrow(GenoM))),
c("DUP", "OH", "ME"))
}
# check parameter values ----
CheckParams(PARAM)
utils::flush.console()
if (any(LifeHistData$Sex==4) && PARAM$Herm == "no") { #!grepl("herm", PARAM$Complex)) {
if (!quiet) cli::cli_alert_warning("detected hermaphrodites (sex=4), changing Herm to 'A'")
# PARAM$Complex <- "herm"
PARAM$Herm <- "A"
}
#=========================
# call Fortran ----
Ng <- nrow(GenoM)
gID <- rownames(GenoM)
FortPARAM <- MkFortParams(PARAM, fun="mayberel")
# turn IDs into numbers, & turn non-genotyped parents into temporary dummies
PedN <- PedToNum(Pedigree, gID, DoDummies = "new") # list: PedPar - DumPar - Renamed - Nd
LHF <- orderLH(LH, gID)
TMP <- .Fortran(findambig,
ng = as.integer(Ng),
specsint = as.integer(FortPARAM$SpecsInt),
specsintamb = as.integer(FortPARAM$SpecsIntAmb),
specsdbl = as.double(FortPARAM$SpecsDbl),
errv = as.double(FortPARAM$ErrM),
genofr = as.integer(GenoM),
sexrf = as.integer(LHF$Sex),
byrf = as.integer(c(LHF$BirthYear, LHF$BY.min, LHF$BY.max)),
aprf = as.double(AgePrior),
parentsrf = as.integer(PedN$PedPar),
dumparrf = as.integer(PedN$DumPar),
namb = as.integer(0),
ambigid = integer(2*MaxPairs),
ambigrel = integer(2*MaxPairs),
ambiglr = double(2*MaxPairs),
ambigoh = integer(MaxPairs),
ntrio = as.integer(0),
trioids = integer(3*Ng),
triolr = double(3*Ng),
triooh = integer(3*Ng))
TMP$ambiglr[abs(TMP$ambiglr - 999) < 0.1] <- NA
TMP$ambiglr <- round(TMP$ambiglr, 2)
TMP$ambigoh[TMP$ambigoh < 0] <- NA
TMP$triolr[abs(TMP$triolr - 999) < 0.1] <- NA
TMP$triolr <- round(TMP$triolr, 2)
TMP$triooh[TMP$triooh < 0] <- NA
#=========================
# non-assigned probable relative pairs
if (TMP$namb > 0) {
RelName <- c("PO", "FS", "HS", "GP", "FA", "HA", "U ", "Q", "2nd")
Na <- TMP$namb
TMP$ambigid <- NumToID(TMP$ambigid, 0, gID, NULL) # maybe-rel only between genotyped indivs
AmbigRel <- factor(TMP$ambigrel, levels=1:9, labels=RelName)
MaybeRel <- data.frame(VtoM(TMP$ambigid, Na),
VtoM(AmbigRel, Na),
VtoM(TMP$ambiglr, Na),
stringsAsFactors=FALSE)
names(MaybeRel) <- c("ID1", "ID2", "Relx", "TopRel", "LLR_Rx_U", "LLR")
MaybeRel <- MaybeRel[,-which(names(MaybeRel) %in% c("Relx", "LLR_Rx_U"))] # drop; confusing.
MaybeRel$OH <- TMP$ambigoh[seq_len(Na)]
if (nrow(MaybeRel)>0) {
LH$BirthYear[LH$BirthYear<0] <- NA
MaybeRel <- merge(MaybeRel, setNames(LH[,1:3], c("ID1","Sex1","BirthYear1")), all.x=TRUE)
MaybeRel <- merge(MaybeRel, setNames(LH[,1:3], c("ID2","Sex2","BirthYear2")), all.x=TRUE)
MaybeRel$AgeDif <- with(MaybeRel, BirthYear1 - BirthYear2)
MaybeRel <- MaybeRel[, c("ID1", "ID2", "TopRel", "LLR", "OH",
"BirthYear1", "BirthYear2", "AgeDif", "Sex1", "Sex2")]
for (i in 1:Na) {
if (is.na(MaybeRel$AgeDif[i])) next
if (MaybeRel$AgeDif[i] < 0) {
tmpRel <- MaybeRel[i,]
tmpRel$AgeDif <- abs(tmpRel$AgeDif)
MaybeRel[i,] <- tmpRel[,c("ID2","ID1","TopRel", "LLR", "OH",
"BirthYear2", "BirthYear1","AgeDif", "Sex2", "Sex1")]
}
}
}
if (Module == "ped") {
MaybeRel <- with(MaybeRel, MaybeRel[TopRel %in% c("PO", "FS", "HS","GP",
"FA", "2nd", "Q"), ]) # drop HA,XX,U
}
MaybeRel <- MaybeRel[order(ordered(MaybeRel$TopRel, levels=RelName),
-MaybeRel$LLR),]
#~~~~~~~~
CalcSnpdBoth <- function(Pairs, GenoM) {
sapply(seq_along(Pairs[,1]), function(i, G=GenoM) {
sum(G[Pairs[i,1], ] >=0 & G[Pairs[i,2], ] >=0)
})
}
#~~~~~~~~
if (nrow(MaybeRel)==0) {
MaybeRel <- NULL
} else {
rownames(MaybeRel) <- 1:nrow(MaybeRel)
MaybeRel$SNPdBoth <- CalcSnpdBoth(MaybeRel[, c("ID1", "ID2")], GenoM)
}
} else MaybeRel <- NULL
if (quiet<1) { # && nrow(MaybeRel)>0
if (!is.null(MaybeRel)) {
nRel <- nrow(MaybeRel)
nPO <- sum(MaybeRel$TopRel=="PO")
} else {
nRel <- 0
nPO <- 0
}
cli::cli_alert_success(c("Found {nPO} likely parent-offspring pairs, ",
"and {nRel-nPO}, other non-assigned pairs of possible relatives"))
}
#=========================
# non-assigned parent-parent-offspring trios
if (TMP$ntrio>0) {
trios <- data.frame(VtoM(TMP$trioids, nr=TMP$ntrio, nc=3),
VtoM(TMP$triolr, nr=TMP$ntrio, nc=3),
VtoM(TMP$triooh, nr=TMP$ntrio, nc=3),
stringsAsFactors=FALSE)
names(trios) <- c("id", "parent1", "parent2", "LLRparent1", "LLRparent2", "LLRpair",
"OHparent1", "OHparent2", "MEpair")
for (k in 1:3) trios[, k] <- NumToID(trios[, k], k-1, gID, NULL)
trios$SNPd.id.parent1 <- CalcSnpdBoth(trios[, c("id", "parent1")], GenoM)
trios$SNPd.id.parent2 <- CalcSnpdBoth(trios[, c("id", "parent2")], GenoM)
if (quiet<1) cli::cli_alert_success("Found {nrow(trios)} parent-parent-offspring trios")
} else trios <- NULL
if (Module == "par") {
return( list(MaybePar = MaybeRel,
MaybeTrio = trios) )
} else {
return( list(MaybeRel = MaybeRel,
MaybeTrio = trios) )
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.