Nothing
#' Genotype distribution for a single marker
#'
#' Computes the genotype probability distribution of one or several pedigree
#' members, possibly conditional on known genotypes for the marker.
#'
#' @param x A `ped` object or a list of such.
#' @param ids A vector of ID labels of one or more members of `x`.
#' @param marker Either a `marker` object or the name (or index) of a marker
#' attached to `x`. If `x` has multiple components, only the latter is
#' allowed.
#' @param partialmarker (Deprecated) An alias for `marker`.
#' @param loopBreakers (Only relevant if the pedigree has loops). A vector with
#' ID labels of individuals to be used as loop breakers. If NULL (default)
#' loop breakers are selected automatically. See [pedtools::breakLoops()].
#' @param grid.subset (Optional; not relevant for most users.) A numeric matrix
#' describing a subset of all marker genotype combinations for the `ids`
#' individuals. The matrix should have one column for each of the `ids`
#' individuals, and one row for each combination: The genotypes are described
#' in terms of the matrix `M = allGenotypes(n)`, where `n` is the number of
#' alleles for the marker. If the entry in column `j` is the integer `k`, this
#' means that the genotype of individual `ids[j]` is row `k` of `M`.
#' @param output A character string, either `"array"` (default), "table" or
#' "sparse". See Value.
#' @param verbose A logical.
#'
#' @return The output format depends on the `output` argument:
#'
#' * "array": A named `k`-dimensional array, where `k = length(ids)`, with the
#' joint genotype distribution for the `ids` individuals, conditional on the
#' known genotypes if present.
#' * "table": A data frame with `k+1` columns, where each row corresponds
#' to a genotype combination, and the last column `prob` gives the
#' probability.
#' * "sparse": A data frame with the same structure as the "table" output,
#' but only combinations with non-zero probability are included.
#'
#' @seealso [twoMarkerDistribution()]
#'
#' @examples
#'
#' # Trivial example: Hardy-Weinberg probabilities for an equifrequent SNP
#' s = singleton(id = 1) |> addMarker(alleles = 1:2, afreq = c(0.5, 0.5))
#' oneMarkerDistribution(s, ids = 1)
#'
#' # Conditioning on a partial genotype
#' s = setGenotype(s, ids = 1, geno = "1/-")
#' oneMarkerDistribution(s, ids = 1)
#'
#' # Genotype distribution for a child of heterozygous parents
#' trio = nuclearPed(father = "fa", mother = "mo", child = "ch") |>
#' addMarker(fa = "1/2", mo = "1/2")
#' oneMarkerDistribution(trio, ids = "ch")
#'
#' # Joint distribution of the parents, given that the child is heterozygous
#' trio = addMarker(trio, ch = "1/2")
#' ids = c("fa", "mo")
#' oneMarkerDistribution(trio, ids = ids, marker = 2)
#'
#' # Table output of the previous example
#' oneMarkerDistribution(trio, ids = ids, marker = 2, output = "table")
#' oneMarkerDistribution(trio, ids = ids, marker = 2, output = "sparse")
#'
#' # A different example: The genotype distribution of an individual (id = 8)
#' # whose half cousin (id = 9) is homozygous for a rare allele.
#' y = halfCousinPed(degree = 1) |>
#' addMarker("9" = "a/a", afreq = c(a = 0.01, b = 0.99))
#'
#' oneMarkerDistribution(y, ids = 8)
#'
#' # Multi-component (trivial) example
#' z = singletons(1:2) |> addMarker(`1` = "1/2", `2` = "1/2", alleles = 1:2)
#' oneMarkerDistribution(z, 1:2)
#' oneMarkerDistribution(z, 1:2, output = "sparse")
#'
#' @export
oneMarkerDistribution = function(x, ids, marker = 1, loopBreakers = NULL,
grid.subset = NULL, partialmarker = NULL,
output = c("array", "table", "sparse"),
verbose = TRUE) {
if(!is.null(partialmarker)) {
cat("The argument `partialmarker` has been renamed to `marker` and will be removed in a future version.\n")
marker = partialmarker
}
ids = as.character(ids)
output = match.arg(output)
formatResult = function(res) {
switch(output, array = res,
table = omd2df(res, ids, sparse = FALSE),
sparse = omd2df(res, ids, sparse = TRUE))
}
if(is.pedList(x)) {
if(is.marker(marker))
stop2("When `x` has multiple components, `marker` cannot be an unattached marker object")
pednr = getComponent(x, ids, checkUnique = TRUE, errorIfUnknown = TRUE)
if(all(pednr == pednr[1]))
x = x[[pednr[1]]]
else {
compRes = lapply(unique.default(pednr), function(i) {
idsC = ids[pednr == i]
lb = if(is.null(loopBreakers)) NULL else .myintersect(loopBreakers, x[[i]]$ID)
gs = if(is.null(grid.subset)) NULL else unique.matrix(grid.subset[, pednr == i, drop = FALSE])
oneMarkerDistribution(x[[i]], idsC, marker = marker, loopBreakers = lb, grid.subset = gs, verbose = FALSE)
})
res = Reduce(`%o%`, compRes)
return(formatResult(res))
}
}
if(!is.ped(x))
stop2("Input is not a pedigree")
m = marker
if (!is.marker(m)) {
if(length(m) != 1)
stop2("`marker` must have length 1")
m = getMarkers(x, markers = m)[[1]]
}
# Special case: Unconditional & all founders
# TODO: Generalisation to unrelated clusters - requires ribd!
if(length(ids) > 1 && all(m == 0) & all(ids %in% founders(x))) {
compRes = lapply(seq_along(ids), function(i) {
gs = if(is.null(grid.subset)) NULL else unique.matrix(grid.subset[, i, drop = FALSE])
oneMarkerDistribution(x, ids[i], marker = m, loopBreakers = loopBreakers,
grid.subset = gs, output = "array", verbose = FALSE)
})
res = Reduce(`%o%`, compRes)
return(formatResult(res))
}
# Reorder if needed
if(!hasParentsBeforeChildren(x)) {
x = parentsBeforeChildren(setMarkers(x, m))
m = x$MARKERS[[1]]
}
if (!is.null(x$LOOP_BREAKERS))
stop2("`ped` objects with pre-broken loops are not allowed as input to `oneMarkerDistribution()`")
alleles = alleles(m)
Xchrom = isXmarker(m)
if (verbose) {
cat("Known genotypes:\n")
print(m)
cat("\nChromosome type :", ifelse(Xchrom, "X-linked", "autosomal"))
cat("\nTarget individuals :", toString(ids), "\n")
}
st = Sys.time()
# Compute grid before loop breaking (works better with eliminate2)
if (is.null(grid.subset))
grid.subset = genoCombinations(x, m, ids, make.grid = TRUE)
else
grid.subset = as.matrix(grid.subset)
if (x$UNBROKEN_LOOPS) {
x = breakLoops(setMarkers(x, m), loopBreakers = loopBreakers, verbose = verbose)
m = x$MARKERS[[1]]
}
int.ids = internalID(x, ids)
allgenos = allGenotypes(nAlleles(m))
# Character with genotype labels
gt.strings = paste(alleles[allgenos[, 1]], alleles[allgenos[, 2]], sep = "/")
if(Xchrom) {
sx = getSex(x, ids)
geno.names = list(alleles, gt.strings)[sx]
}
else {
geno.names = rep(list(gt.strings), length(ids))
}
# Create output array. Will hold likelihood of each genotype combo
probs = array(0, dim = lengths(geno.names, use.names = FALSE), dimnames = geno.names)
# Subset of `probs` that is affected by grid.subset
probs.subset = grid.subset
# Needs adjustment for X (in male columns)
if(Xchrom) {
homoz = which(allgenos[,1] == allgenos[,2])
probs.subset[, sx == 1] = match(probs.subset[, sx == 1], homoz)
}
nr = nrow(grid.subset)
# Compute marginal
marginal = likelihood(x, markers = m)
if (marginal == 0)
stop2("Partial marker is impossible")
if(verbose) {
cat("Marginal likelihood:", marginal, "\n")
cat("Calculations needed:", nr, "\n")
}
# Create list of all markers
mlist = lapply(seq_len(nr), function(i) {
r = grid.subset[i,]
m[int.ids, ] = allgenos[r, ]; m})
# Calculate likelihoods and insert in result array
y = setMarkers(x, mlist, checkCons = FALSE)
probs[probs.subset] = likelihood(y, allX = Xchrom)
# Timing
if(verbose)
cat("\nAnalysis finished in", format(Sys.time() - st, digits = 3), "\n")
res = probs/marginal
formatResult(res)
}
# Convert output array to data frame
omd2df = function(arr, ids, sparse = FALSE) {
dn = dimnames(arr)
args = c(dn, stringsAsFactors = FALSE, KEEP.OUT.ATTRS = FALSE)
df = do.call(expand.grid, args)
names(df) = ids
df$prob = as.vector(arr)
if(sparse) {
df = df[df$prob > 0, , drop = FALSE]
rownames(df) = NULL
}
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.