##########
# QC_MAF #
##########
#' Minor allele frequency
#'
#' Compute minor allele frequency (MAF) at the whole population and
#' within crosses if a cross indicator vector is provided to \code{cross.ind}.
#'
#' @param mk.mat \code{Character} marker score \code{matrix} with genotypes as
#' row and markers as column. \strong{Rows and columns names must be the genotype
#' and marker identifiers respectively. Marker scores must be coded using one
#' letter per allele. For example, AA, CC, GG, TT, AC, AG, AT, CA, CG, CT,
#' GA, GC, GT, TA, TC, TG. Missing values must be coded \code{NA}.}
#'
#' @param cross.ind \code{Character} vector indicating to which cross each
#' genotype belongs. If \code{cross.ind = NULL}, the function do not consider cross
#' subdivisions and only calculate MAF at the population level. Default = NULL.
#'
#' @param parallel \code{Logical} value specifying if the function should be
#' executed in parallel on multiple cores. Default = FALSE.
#'
#' @param cluster Cluster object obtained with the function \code{makeCluster()}
#' from the parallel package. Default = NULL.
#'
#' @return Return:
#'
#' Object of class mafRes MAF containing the following element(s).
#' If cross.ind = NULL
#'
#' \item{MAF.pop}{Vector of marker allele MAF at the
#' population level. \code{NA} if all markers scores are missing.}
#'
#' If cross.ind is not NULL
#'
#' \item{MAF.pop}{Vector of marker allele MAF at the
#' population level. \code{NA} if all markers scores are missing.}
#'
#' \item{MAF.cr}{\code{Matrix} of marker allele MAF within crosses.
#' \code{NA} if all markers scores are missing.}
#'
#' @author Vincent Garin
#'
#' @seealso
#'
#' \code{\link{QC_tagMAFCr}}
#'
#' @examples
#'
#' data(USNAM_geno)
#' cross.ind <- substr(rownames(USNAM_geno)[7:dim(USNAM_geno)[1]], 1, 4)
#'
#' MAF <- QC_MAF(mk.mat = USNAM_geno[7:dim(USNAM_geno)[1], ],
#' cross.ind = cross.ind)
QC_MAF <- function(mk.mat, cross.ind = NULL, parallel = FALSE,
cluster = NULL) {
# 1. check data format
######################
stopifnot(is.matrix(mk.mat), is.character(mk.mat))
# 2. function to calculate MAF on a single vector
#################################################
MAF.vect <- function(x) {
x <- x[!is.na(x)] # remove missing values
if (length(x) == 0){ # Only missing values
return(NA)
} else {
# split all marker score into single allele scores
alleles <- c(vapply(X = x, FUN = function(x) unlist(strsplit(x, split = "")),
FUN.VALUE = character(2)))
if(length(table(alleles)) == 1){ # monomorphic marker
return(0)
} else {
return(min(table(alleles) / length(alleles)))
}
}
} # end single vector MAF function
if(is.null(cross.ind)){ # compute MAF only at population level
### 3.1 MAF at population level
if(parallel) {
MAF.pop <- parApply(cl = cluster, X = mk.mat, MARGIN = 2, FUN = MAF.vect)
} else {
MAF.pop <- apply(X = mk.mat, MARGIN = 2, FUN = MAF.vect)
}
MAF <- MAF.pop
class(MAF) <- c("mafRes")
return(MAF)
} else { # compute MAF at population and cross level
### 3.1 check that the cross indicator vector has the same length
if(length(cross.ind) != dim(mk.mat)[1]){
stop("The cross indicator vector (cross.ind) length does not have the same
length as the genotype list (dim(mk.mat)[1])")
}
### 3.2 MAF at population level
if(parallel) {
MAF.pop <- parApply(cl = cluster, X = mk.mat, MARGIN = 2, FUN = MAF.vect)
} else {
MAF.pop <- apply(X = mk.mat, MARGIN = 2, FUN = MAF.vect)
}
### 3.3 MAF at the cross level
# function to apply MAF computation on within cross
cross.ind <- factor(cross.ind, levels = unique(cross.ind))
MAF.vect.cr <- function(x) tapply(X = x, INDEX = cross.ind, FUN = MAF.vect)
if(parallel) {
MAF.cr <- parApply(cl = cluster, X = mk.mat, MARGIN = 2, FUN = MAF.vect.cr)
} else {
MAF.cr <- apply(X = mk.mat, MARGIN = 2, FUN = MAF.vect.cr)
}
MAF <- list(MAF.pop = MAF.pop, MAF.cr = MAF.cr)
class(MAF) <- c("list", "mafRes")
return(MAF)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.