Nothing
#' Multi-person IBD coefficients
#'
#' Computes the probabilities (coefficients) of all possible patterns of
#' identity by descent (IBD) sharing at a single locus, among N>1 non-inbred
#' members of a pedigree. The reported coefficients are "condensed" in the sense
#' that allele ordering within each individual is ignored. For N = 2, the result
#' should agree with the traditional "kappa" coefficients, as computed by
#' [kappaIBD()]. This function is under development, and should be regarded as
#' experimental. For now, the only cases handled are those with: N = 2 or 3,
#' autosomal locus.
#'
#' Consider N members of a pedigree, i1, i2, ... iN. A pattern of IBD sharing
#' between these individuals is a sequence of N ordered pairs of labels, (a1_1,
#' a1_2), (a2_1, a2_2), ... (aN_1, aN_2), where ai_1 and ai_2 represent the
#' paternal and maternal allele of individual i, respectively. Equality of
#' labels means that the corresponding alleles are IBD, and vice versa.
#'
#' We say that two IBD patterns are equivalent if one can be transformed into
#' the other by some combination of
#'
#' * renaming the labels (without changing the structure)
#'
#' * swapping the paternal/maternal labels of some individuals
#'
#' Each equivalence class has a "minimal" element, using integer labels, and
#' being minimal with respect to standard sorting. For example, the minimal
#' element equivalent to `(a,c),(d,c),(b,b)` is `(1,2),(2,3),(4,4)`.
#'
#' @param x A `ped` object.
#' @param ids A vector of ID labels.
#' @param complete A logical. If FALSE, only IBD patterns with nonzero
#' probability are included in the output.
#' @param verbose A logical. If TRUE, some computational details are printed.
#'
#' @return A data frame in which each row corresponds to an equivalence class of
#' multi-person IBD patterns. The first column gives the calculated
#' probability, followed by one column for each `ids` individual, describing
#' the minimal element of the equivalence class. (See Details.) If `complete =
#' FALSE` (the default) rows with probability 0 are removed.
#'
#' @examples
#' ### Trivial example: Trio ###
#' x = nuclearPed(1)
#' ids = 1:3
#' multiPersonIBD(x, ids, complete = TRUE)
#'
#' ### Example due to Peter Green ###
#' # Three (pairwise) cousins arranged in two different ways,
#' # with different 3-way IBD coefficients.
#'
#' threeCousins1 = ped(
#' id = c('gf','gm','gf1','gf2','gf3','gm1','gm2','gm3',
#' 'f1','f2','f3','m1','m2','m3','c1','c2','c3'),
#' fid = c(0,0,0,0,0,0,0,0,'gf1','gf2','gf3','gf','gf','gf',
#' 'f1','f2','f3'),
#' mid = c(0,0,0,0,0,0,0,0,'gm1','gm2','gm3','gm','gm','gm',
#' 'm1','m2','m3'),
#' sex = c(1,2,1,1,1,2,2,2,1,1,1,2,2,2,1,1,1))
#'
#' threeCousins2 = ped(
#' id = c('gf1','gf2','gf3','gm1','gm2','gm3','f1','f2','f3',
#' 'm1','m2','m3','c1','c2','c3'),
#' fid = c(0,0,0,0,0,0,'gf2','gf3','gf1','gf3','gf1','gf2',
#' 'f1','f2','f3'),
#' mid = c(0,0,0,0,0,0,'gm2','gm3','gm1','gm3','gm1','gm2',
#' 'm1','m2','m3'),
#' sex = c(1,1,1,2,2,2,1,1,1,2,2,2,1,1,1))
#'
#' ids = c('c1','c2','c3')
#' multiPersonIBD(threeCousins1, ids)
#' multiPersonIBD(threeCousins2, ids)
#'
#' @export
multiPersonIBD = function(x, ids, complete = FALSE, verbose = FALSE) {
N = length(ids)
if(N < 2)
stop2("`length(ids)` must be at least 2")
if(N > 6)
stop2("length(ids) > 6: Not implemented yet")
# Stop if any of `ids` are inbred
inb = inbreeding(x, ids)
if(any(isInbred <- inb > .Machine$double.eps)) {
message(paste0(sprintf(" Individual '%s' is inbred (f = %g)",
ids[isInbred], inb[isInbred]), collapse = "\n"))
stop2("This function requires all `ids` individuals to be non-inbred")
}
# Pedigree order: Enforce parents to precede their children
if(!hasParentsBeforeChildren(x))
x = parentsBeforeChildren(x)
x = foundersFirst(x)
# Setup memoisation
mem = memoIdentity(x, method = "WL")
allPatterns = MULTIPATTERNS_NONINBRED[[N]]
usePatterns = removeImpossiblePatterns(allPatterns, x, ids, verbose = verbose)
# Vector of `ids` repeated twice (needed in the generalised below)
ids2 = rep(ids, each = 2)
coefs = apply(usePatterns, 1, function(r) {
kp = r[1:(2*N)]
weight = r[2*N + 1]
g = gKinship(x, split(ids2, kp), mem = mem, method = "WL")
2^N * weight * g
})
# Print computational summary
if(verbose)
printMemInfo(mem)
# Collect into data frame
glist = lapply(seq(1, 2*N, by = 2), function(i)
paste(usePatterns[, i], usePatterns[, i+1], sep = " "))
res = as.data.frame(glist, col.names = as.character(ids), optional = TRUE,
stringsAsFactors = FALSE)
res = cbind(Prob = coefs, res)
# Keep only nonzero rows
if(!complete)
res = res[res$Prob > 0, , drop = FALSE]
# Return
res
}
#### Utility functions used in multiPersonIBD ####
# Reduce a sequence of alleles to "standard" form
# a,c,a,d -> 1,2,1,3
# 2,1,4,3,1,1 -> 1,2,3,4,2,2
# NB: simpler but slower: as.integer(factor(x, levels = unique.default(x)))
standardAlleleSeq = function(x) { # x a vector of even length
res = integer(length(x))
a = x[1]
i = 1L
while(TRUE) {
res[x == a] = i
a = x[match(0, res)]
if(is.na(a))
break
i = i + 1
}
res
}
#' Minimal IBD pattern
#'
#' Compute the minimal form of given multiperson IBD pattern.
#'
#' @param x An integer vector of even length.
#'
#' @return An integer vector of the same length as `x`.
#'
#' @examples
#' v = c(1,2,2,3)
#' stopifnot(identical(minimalPattern(v), c(1,2,1,3)))
#'
#' @export
minimalPattern = function(x) {
if(is.matrix(x)) {
res = apply(x, 1, minimalPattern)
return(res)
}
if(!is.numeric(x) || any(as.integer(x) != x))
stop("The input must be a vector of integers:", toString(x))
n = length(x)
if(n %% 2 != 0)
stop("Input vector must have even length, not ", toString(x))
# Utility for inserting a certain value at given positions.
# If "value" already exist in the vector, these are replaced with a larger dummy
insertAt = function(y, pos, value) {
y[y == value] = max(y) + 1 # dummy
y[pos] = value
y
}
nPairs = n/2
even = 2 * seq_len(nPairs)
odd = even - 1
i = 0
# Loop through pairs of alleles (i.e. genotypes)
for(k in seq_len(nPairs)) {
a = x[2*k - 1]
b = x[2*k]
if(a == b) {
if(a > i) {
x = insertAt(x, x == a, i + 1)
i = i + 1
}
next
}
# How many alleles are not seen before?
n.new = sum(a > i, b > i)
if(n.new == 2) { # both new!
idx.a = x == a
idx.b = x == b
grps.a = idx.a[odd] | idx.a[even]
grps.b = idx.b[odd] | idx.b[even]
# Swap labels if the first genotype with exactly one of a,b has b.
diffs = grps.a != grps.b
swap = any(diffs) && grps.b[match(TRUE, diffs)]
x = insertAt(x, idx.a, value = if(swap) i+2 else i+1)
x = insertAt(x, idx.b, value = if(swap) i+1 else i+2)
i = i + 2
}
else if (n.new == 1) { # only one new
x = insertAt(x, x == max(a,b), i + 1)
i = i + 1
}
# Always ensure the resulting pair is sorted
if((g1 <- x[2*k - 1]) > (g2 <- x[2*k])) {
x[2*k - 1] = g2
x[2*k] = g1
}
}
# Return modified vector
x
}
# Produce all permutations made by swapping the two alleles within (not between) individuals
expandSwaps = function(x, makeUnique = TRUE) { # x a vector of even length
n = length(x)/2
res = matrix(0L, nrow = 2*n, ncol = 2^n)
for(i in 1:n) {
rows = c(2*i-1, 2*i)
g = x[rows]
swap = rep(c(FALSE, TRUE), each = 2^(n-i), length.out = 2^n)
res[rows, !swap] = g
res[rows, swap] = g[2:1]
}
res = apply(res, 2, standardAlleleSeq)
if(makeUnique)
res = unique.matrix(res, MARGIN = 2)
t.default(res)
}
# Remove impossible patterns based on pairwise kappa
removeImpossiblePatterns = function(patterns, x, ids, verbose = TRUE) {
N = length(ids)
if(length(ids) < 3) return(patterns[, 1:(2*N + 1)])
# Original number of patterns
nr = nrow(patterns)
kappas = kappaIBD(x, ids)
nonz = lapply(1:nrow(kappas), function(i) (0:2)[kappas[i, 3:5] > 0])
names(nonz) = sapply(combn(N, 2, simplify = FALSE), paste, collapse = "-")
for(p in names(nonz)) {
goodrows = patterns[, p] %in% nonz[[p]]
patterns = patterns[goodrows, , drop = FALSE]
}
# Report change
if(verbose)
message(sprintf("State space reduction: %d --> %d", nr, nrow(patterns)))
patterns
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.