Nothing
#' @title Generate Genotyping Error Matrix
#'
#' @description Make a vector or matrix specifying the genotyping error
#' pattern, or a function to generate such a vector/matrix from a single
#' value Err.
#'
#'
#' with the probabilities of observed genotypes
#' (columns) conditional on actual genotypes (rows), or return a function to
#' generate such matrices (using a single value Err as input to that
#' function).
#'
#' @details By default (\code{flavour} = "version2.9"), \code{Err} is
#' interpreted as a locus-level error rate (rather than allele-level), and
#' equals the probability that an actual heterozygote is observed as either
#' homozygote (i.e., the probability that it is observed as AA = probability
#' that observed as aa = \code{Err}/2). The probability that one homozygote is
#' observed as the other is (\code{Err}/2\eqn{)^2}.
#'
#' The inbuilt 'flavours' correspond to the presumed and simulated error
#' structures, which have changed with sequoia versions. The most appropriate
#' error structure will depend on the genotyping platform; 'version0.9' and
#' 'version1.1' were inspired by SNP array genotyping while 'version1.3' and
#' 'version2.0' are intended to be more general.
#'
#' This function, and throughout the package, it is assumed that the two alleles
#' \eqn{A} and \eqn{a} are equivalent. Thus, using notation \eqn{P}(observed
#' genotype |actual genotype), that \eqn{P(AA|aa) = P(aa|AA)},
#' \eqn{P(aa|Aa)=P(AA|Aa)}, and \eqn{P(aA|aa)=P(aA|AA)}.
#'
#' \tabular{lccc}{
#' \strong{version} \tab \strong{hom|hom} \tab \strong{het|hom} \tab
#' \strong{hom|het} \cr
#' \strong{2.9} \tab \eqn{(E/2)^2} \tab \eqn{E-(E/2)^2} \tab \eqn{E/2} \cr
#' \strong{2.0} \tab \eqn{(E/2)^2} \tab \eqn{E(1-E/2)} \tab \eqn{E/2} \cr
#' \strong{1.3} \tab \eqn{(E/2)^2} \tab \eqn{E} \tab \eqn{E/2} \cr
#' \strong{1.1} \tab \eqn{E/2} \tab \eqn{E/2} \tab \eqn{E/2} \cr
#' \strong{0.9} \tab \eqn{0} \tab \eqn{E} \tab \eqn{E/2} \cr
#' }
#'
#' or in matrix form, Pr(observed genotype (columns) | actual genotype (rows)):
#'
#' \emph{version2.9:}
#' \tabular{lccc}{
#' \tab \strong{0} \tab \strong{1} \tab \strong{2} \cr
#' \strong{0} \tab \eqn{1-E} \tab \eqn{E -(E/2)^2} \tab \eqn{(E/2)^2} \cr
#' \strong{1} \tab \eqn{E/2} \tab \eqn{1-E} \tab \eqn{E/2} \cr
#' \strong{2} \tab \eqn{(E/2)^2} \tab \eqn{E -(E/2)^2} \tab \eqn{1-E} \cr
#' }
#'
#' \emph{version2.0:}
#' \tabular{lccc}{
#' \tab \strong{0} \tab \strong{1} \tab \strong{2} \cr
#' \strong{0} \tab \eqn{(1-E/2)^2} \tab \eqn{E(1-E/2)} \tab \eqn{(E/2)^2} \cr
#' \strong{1} \tab \eqn{E/2} \tab \eqn{1-E} \tab \eqn{E/2} \cr
#' \strong{2} \tab \eqn{(E/2)^2} \tab \eqn{E(1-E/2)} \tab \eqn{(1-E/2)^2} \cr
#' }
#'
#' \emph{version1.3}
#' \tabular{lccc}{
#' \tab \strong{0} \tab \strong{1} \tab \strong{2} \cr
#' \strong{0} \tab \eqn{1-E-(E/2)^2} \tab \eqn{E} \tab \eqn{(E/2)^2} \cr
#' \strong{1} \tab \eqn{E/2} \tab \eqn{1-E} \tab \eqn{E/2} \cr
#' \strong{2} \tab \eqn{(E/2)^2} \tab \eqn{E} \tab \eqn{1-E-(E/2)^2} \cr
#' }
#'
#' \emph{version1.1}
#' \tabular{lccc}{
#' \tab \strong{0} \tab \strong{1} \tab \strong{2} \cr
#' \strong{0} \tab \eqn{1-E} \tab \eqn{E/2} \tab \eqn{E/2} \cr
#' \strong{1} \tab \eqn{E/2} \tab \eqn{1-E} \tab \eqn{E/2} \cr
#' \strong{2} \tab \eqn{E/2} \tab \eqn{E/2} \tab \eqn{1-E} \cr
#' }
#'
#' \emph{version0.9} (not recommended)
#' \tabular{lccc}{
#' \tab \strong{0} \tab \strong{1} \tab \strong{2} \cr
#' \strong{0} \tab \eqn{1-E} \tab \eqn{E} \tab \eqn{0} \cr
#' \strong{1} \tab \eqn{E/2} \tab \eqn{1-E} \tab \eqn{E/2} \cr
#' \strong{2} \tab \eqn{0} \tab \eqn{E} \tab \eqn{1-E} \cr
#' }
#'
#' When \code{Err} is a length 3 vector, or if \code{Return = 'vector'} these
#' are the following probabilities:
#' \itemize{
#' \item hom|hom: an actual homozygote is observed as the other homozygote
#' (\eqn{E_1})
#' \item het|hom: an actual homozygote is observed as heterozygote (\eqn{E_2})
#' \item hom|het: an actual heterozygote is observed as homozygote (\eqn{E_3})
#' }
#'
#' and Pr(observed genotype (columns) | actual genotype (rows)) is then:
#' \tabular{lccc}{
#' \tab \strong{0} \tab \strong{1} \tab \strong{2} \cr
#' \strong{0} \tab \eqn{1-E_1-E_2} \tab \eqn{E_2} \tab \eqn{E_1} \cr
#' \strong{1} \tab \eqn{E_3} \tab \eqn{1-2E_3} \tab \eqn{E_3} \cr
#' \strong{2} \tab \eqn{E_1} \tab \eqn{E_2} \tab \eqn{1-E_1-E_2} \cr
#' }
#'
#' When the SNPs are scored via sequencing (e.g. RADseq or DArTseq), the 3rd
#' error rate (hom|het) is typically considerably higher than the other two,
#' while for SNP arrays it tends to be similar to P(het|hom).
#'
#'
#' @param Err estimated genotyping error rate, as a single number, or 3x3 or 4x4
#' matrix, or length 3 vector. If a single number, an error model is used that
#' aims to deal with scoring errors typical for SNP arrays. If a matrix, this
#' should be the probability of observed genotype (columns) conditional on
#' actual genotype (rows). Each row must therefore sum to 1. If
#' \code{Return='function'}, this may be \code{NA}. If a vector, these are the
#' probabilities (observed given actual) hom|other hom, het|hom, and hom|het.
#' @param flavour vector-generating or matrix-generating function, or one of
#' 'version2.9', 'version2.0', 'version1.3' (='SNPchip'), 'version1.1'
#' (='version111'), referring to the sequoia version in which it was used as
#' default. Only used if \code{Err} is a single number.
#' @param Return output, 'matrix' (default), 'vector', 'function'
#' (matrix-generating), or 'v_function' (vector-generating)
#'
#' @return Depending on \code{Return}, either:
#' \itemize{
#' \item \code{'matrix'}: a 3x3 matrix, with probabilities of observed genotypes
#' (columns) conditional on actual (rows)
#' \item \code{'function'}: a function taking a single value \code{Err} as input, and
#' generating a 3x3 matrix
#' \item \code{'vector'}: a length 3 vector, with the probabilities (observed given
#' actual) hom|other hom, het|hom, and hom|het.
#' }
#'
#'
#' @examples
#' ErM <- ErrToM(Err = 0.05)
#' ErM
#' ErrToM(ErM, Return = 'vector')
#'
#'
#' # use error matrix from Whalen, Gorjanc & Hickey 2018
#' funE <- function(E) {
#' matrix(c(1-E*3/4, E/2, E/4,
#' E/4, 1-2*E/4, E/4,
#' E/4, E/2, 1-E*3/4),
#' 3,3, byrow=TRUE) }
#' ErrToM(Err = 0.05, flavour = funE)
#' # equivalent to:
#' ErrToM(Err = c(0.05/4, 0.05/2, 0.05/4))
#'
#' @export
ErrToM <- function(Err = NA,
flavour = "version2.9",
Return = "matrix")
{
if (is.null(Err) || length(Err)==1 && is.na(Err) && Return %in% c("function", "v_function"))
Err <- 0.1 # only used for testing
if (!is.atomic(Err) || !length(Err) %in% c(1,3,9,16)) stop("'Err' must be a single number, length 3 vector, or 3x3 matrix")
if (any(Err<0 | Err>1) || !is.double(Err)) stop("'Err' must be (a) number(s) between 0 and 1")
ErrV <- NULL # hom|hom, het|hom, hom|het
ErrM <- NULL # observed (columns) conditional on actual (rows)
ErFuncV <- NULL # function to generate a length-3 vector
ErFunc <- NULL # function to generate a 3x3 or 4x4 matrix
ValidFlavours <- c(paste0('version', c('2.9', '2.0', '1.3', '1.1', '0.9', '0.7')), 'SNPchip')
if (is.matrix(Err)) {
if (nrow(Err)==3 & ncol(Err)==3) {
ErrM <- Err
} else if (nrow(Err)==4 & ncol(Err)==4) {
ErrM <- shrinkEM(Err)
} else {
stop("Error matrix should be a 3x3 or 4x4 matrix")
}
} else if (length(Err)==3) {
ErrV <- Err
} else if (length(Err) > 1) {
stop("Err can be a 3x3 or 4x4 matrix, a length 3 vector, or a single number")
} else {
if (is.function(flavour)) {
Err_test <- flavour(Err)
if (length(Err_test)==3) {
ErFuncV <- flavour
ErrV <- ErFuncV(Err)
} else {
ErFunc <- flavour
if (!is.matrix(Err_test)) stop("ErFunc(E) should return a 3x3 or 4x4 matrix")
if (!(all(dim(Err_test)==4) | all(dim(Err_test)==3)) ) stop("ErFunc(E) should return a 4x4 or 3x3 matrix")
ErrM <- ErFunc(Err)
}
Err_test.B <- flavour(Err+0.1)
if (all(Err_test.B == Err_test)) stop("'flavour' function is not a function of error rate Err")
} else if (length(flavour)==1 && flavour %in% ValidFlavours) {
# hom|hom, het|hom, hom|het
ErFuncV <- switch(flavour,
"version2.9" = function(E) c((E/2)^2, E-(E/2)^2, E/2),
"version2.0" = function(E) c((E/2)^2, E*(1-E/2), E/2),
"version1.3" = function(E) c((E/2)^2, E, E/2),
"SNPchip" = function(E) c((E/2)^2, E, E/2),
"version1.1" = function(E) c(E/2, E/2, E/2),
"version0.9" = function(E) c(0, E, E/2),
"version0.7" = function(E) c(0, E, E/2),
function(E) c(0,0,0))
ErrV <- ErFuncV(Err)
} else {
stop("Unknown ErrFlavour, choose 'version2.9', 'version2.0', 'version1.3', 'version1.1', \n",
"or specify vector or matrix(-generating function) via 'Err'")
}
}
if (!is.null(ErrM) && (!is.double(ErrM) || any(ErrM<0 | ErrM>1)))
stop("Error vector/matrix values must be between 0 and 1")
if (!is.null(ErrV) && (!is.double(ErrV) || any(ErrV<0 | ErrV>1)))
stop("Error vector/matrix values must be between 0 and 1")
if (!is.null(ErrM) && !all(abs(rowSums(ErrM) - 1) < sqrt(.Machine$double.eps)))
stop("Error matrix rows must sum to 1")
if (Return == "matrix") {
if (is.null(ErrM)) ErrM <- ErV2M(ErrV)
dimnames(ErrM) <- list(paste0("act-", 0:2), paste0("obs-", 0:2, "|act"))
return( ErrM )
} else if (Return == 'vector') {
if (is.null(ErrV)) ErrV <- ErM2V(ErrM)
return( ErrV )
} else if (Return == 'v_function') {
if (is.null(ErFuncV) & !is.null(ErFunc))
stop('Cannot automatically make vector function from matrix function')
if (!is.null(ErFuncV)) {
return( ErFuncV )
} else {
stop("Don't know how to make error function from vector or matrix")
}
} else if (Return == "function") {
if (is.null(ErFunc) & !is.null(ErFuncV)) ErFunc <- EfuncV2M(ErFuncV)
if (!is.null(ErFunc)) {
return( ErFunc )
} else {
stop("Don't know how to make error function from vector or matrix")
}
} else {
stop("Unknown Return format")
}
}
#===============================================================================
#===============================================================================
# 4x4 matrix (aa, aA, Aa, AA) to 3x3 matrix
shrinkEM <- function(EM4) {
EM3 <- matrix(NA, 3,3)
EM3[c(1,3), c(1,3)] <- EM4[c(1,4), c(1,4)]
EM3[2, c(1,3)] <- EM4[2, c(1,4)]+EM4[3, c(1,4)]
EM3[c(1,3), 2] <- EM4[c(1,4), 2] + EM4[c(1,4), 2]
EM3[2, 2] <- sum(EM4[2:3, 2:3])
return( EM3 )
}
# vector -> matrix Pr(observed genotype (columns) | actual genotype (rows)):
ErV2M <- function(ErV,
CallRate=NULL) { # for use by SimGeno
if (is.null(CallRate)) {
ErrM <- matrix(NA, 3,3, dimnames = list(act=0:2, obs=0:2))
} else {
ErrM <- matrix(NA, 3,4, dimnames = list(act=0:2, obs=-1:2))
}
ErrM['0', c('0','1','2')] <- c(1-ErV[1]-ErV[2], ErV[2], ErV[1])
ErrM['1', c('0','1','2')] <- c(ErV[3], 1-2*ErV[3], ErV[3])
ErrM['2', c('0','1','2')] <- c(ErV[1], ErV[2], 1-ErV[1]-ErV[2])
if (!is.null(CallRate)) {
ErrM[,c('0','1','2')] <- ErrM[,c('0','1','2')] * CallRate
ErrM[,'-1'] <- 1 - CallRate
}
return( ErrM )
}
# matrix --> vector: hom -> other hom, hom -> het, het -> hom
ErM2V <- function(ErrM) {
ErV <- setNames(rep(NA,3), c('hom|hom', 'het|hom', 'hom|het'))
ErV['hom|hom'] <- (ErrM[1,3] + ErrM[3,1])/2
ErV['het|hom'] <- (ErrM[1,2] + ErrM[3,2])/2
ErV['hom|het'] <- (ErrM[2,1] + ErrM[2,3])/2
return( ErV )
}
EfuncV2M <- function(Vfun) {
VP <- body(Vfun)[2:4]
Mfun <- function(E) {}
body(Mfun) <- bquote(matrix(c(1-.(VP[[1]])-.(VP[[2]]), .(VP[[2]]), .(VP[[1]]),
.(VP[[3]]), 1-2*.(VP[[3]]), .(VP[[3]]),
.(VP[[1]]), .(VP[[2]]), 1-.(VP[[1]])-.(VP[[2]])),
3,3,byrow=TRUE))
return(Mfun)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.