Nothing
#' Random match probability of profile(s)
#'
#' Computes the random/conditional match probability.
#' @param x Integer matrix with the profile(s) for which random match probability is computed.
#' @param freqs A list specifying the allelic frequencies. Should contain a vector of allelic frequencies for each locus, named after that locus.
#' @param markers Character vector stating the markers to use in the rmp computation. Defaults to all markers of the profile.
#' @param theta Numeric value specifying the amount of background relatedness.
#' @param cmp Logical conditional match probability. If TRUE, the Balding-Nichols formula is used to compute the conditional match probability in the same subpopulation.
#' @param ret.per.marker Logical. If TRUE, return a matrix of random match probabilities, where the columns correspond to markers.
#' @details When \eqn{\theta=0}, the simple product rule is used. Assuming Hardy-Weinberg and Linkage Equilibrium, the random match probability (rmp) for unordered is computed as the product of \deqn{2^H f_a f_b} over the loci, where \eqn{f_a} and \eqn{f_b} are respectively the population frequencies of allele \eqn{a} and \eqn{b} and \eqn{H} is the indicator function for heterozygosity (alleles \eqn{a} and \eqn{b} are not the same).
#'
#' When \eqn{\theta>0} and \code{cmp=FALSE}, the product rule is used that incorporates a correction for inbreeding as measured by \eqn{theta}.
#'
#' When \eqn{\theta>0} and \code{cmp=TRUE}, a product rule involving a subpopulation correction is used, as given by Balding & Nichols. The match probability for homozygotes is given by: \deqn{\frac{(2 \theta+(1-\theta)f_a)(3 \theta+(1-\theta)f_a)}{(1+\theta)(1+2 \theta)},}and for heterozygotes by: \deqn{\frac{2(\theta+(1-\theta)f_a)(\theta+(1-\theta)f_b)}{(1+\theta)(1+2\theta)}.}
#'
#' If \code{x} contains missing values (NAs) at a marker, then the returned match probability equals one for persons with both alleles missing. If a single allele is missing, then the match probability is equal to the frequency of the single allele that is seen, unless the conditional match probability is computed.
#' @return numeric matrix of random match probabilities. When \code{ret.per.matrix} is \code{TRUE}, the columns contain rmps per marker.
#' @examples
#' ## compute the conditional match probability for two markers
#' data(freqsNLngm)
#' y <- sample.profiles(N=1,freqsNLngm)
#' rmp(y,markers = c("FGA","TH01"),theta=0.03,cmp=TRUE,ret.per.marker = TRUE)
#' rmp(y,markers = c("FGA","TH01"),ret.per.marker = TRUE) # compare to product rule estimate
#'
#' ## make a plot of density estimates of RMPs of profiles on the 10 SGMplus
#'
#' data(freqsNLsgmplus)
#'
#' #sample profiles
#' x <- sample.profiles(N=1e3,freqsNLsgmplus)
#'
#' #compute RMPs
#' x.rmp <- rmp(x)
#'
#' plot(density(log10(x.rmp)),
#' xlab=expression(log[10](RMP)),
#' main="Random match probabilities for SGMplus profiles")
#'
#'
#'@export
#'
#'
rmp <- function(x,freqs=get.freqs(x),markers=get.markers(x),theta=0,cmp=FALSE,ret.per.marker=FALSE){
x <- Zassure.matrix(x)
x.markers <- get.markers(x) # does a check on the column names of x
freqs.markers <- names(freqs)
# check if profiles and frequencies are available for these markers
if (!all(markers %in% freqs.markers)){
stop("Allele frequencies unavailable for marker(s) ",paste(markers[!markers %in% freqs.markers],collapse=", "))
}
if (!all(markers %in% x.markers)){
stop("x does not contain marker(s) ",paste(markers[!markers %in% x.markers],collapse=", "))
}
if (!cmp){
# no cmp so we can use Zrmpcpp function, which requires matrices as input
x <- x[,paste(rep(markers,each=2),1:2,sep="."),drop=FALSE]
# check if db contains off ladder alleles which could crash the Zrmpcpp function
min.x <- min(x,na.rm = TRUE)
max.x <- max(x,na.rm = TRUE)
if (min.x<1L) stop("alleles should be positive integers")
freqs.L <- sapply(freqs[markers],length)
if (max.x>max(freqs.L)) stop("db contains allele that is not in freqs")
freqs.mat <- matrix(0.,nrow = max(freqs.L),ncol = length(markers))
for(m in seq_along(markers)) freqs.mat[seq_len(freqs.L[m]),m] <- as.vector(freqs[[markers[m]]])
ret <- Zrmp(x,freqs.mat,theta,as.integer(ret.per.marker))
}else{ # Balding-Nichols formula
ret <- matrix(1,nrow=nrow(x),ncol=ifelse(ret.per.marker,length(markers),1))
for (m in seq_along(markers)){
ret.marker <- rep(1,nrow(x))
a <- as.integer(x[,paste(markers[m],1,sep=".")]) # first allele @ marker
b <- as.integer(x[,paste(markers[m],2,sep=".")]) # second allele @ marker
fr0 <- as.vector(freqs[[markers[m]]]) # look up allele freqs for marker
# if a single allele is missing, then we can't use the standard formula
a.na <- is.na(a); b.na <- is.na(b)
single.na <- xor(a.na,b.na)
if (any(single.na)) {
ret.marker[(single.na&a.na)] <- fr0[b[(single.na&a.na)]]*(1-theta)+theta
ret.marker[(single.na&b.na)] <- fr0[a[(single.na&b.na)]]*(1-theta)+theta
}
# now the case that no allele is missing or both are missing
hom <- (a==b) #homozygous or NA
hom.ind <- which(hom); het.ind <- which(!hom) # gets rid of the NAs in the logical, so actual hom/hets
ret.marker[hom.ind] <- (2*theta+(1-theta)*fr0[a[hom.ind]])*(3*theta+(1-theta)*fr0[a[hom.ind]])/ ((1+theta)*(1+2*theta))
ret.marker[het.ind] <- 2*(theta+(1-theta)*fr0[a[het.ind]])*(theta+(1-theta)*fr0[b[het.ind]])/((1+theta)*(1+2*theta))
if (ret.per.marker) ret[,m] <- ret.marker else ret <- ret * ret.marker
}
}
if (ret.per.marker) colnames(ret) <- markers
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.