Nothing
#' QTL search by MIM
#'
#' Expectation-maximization algorithm for QTL multiple interval mapping to
#' find one more QTL in the presence of some known QTLs. It can handle
#' genotype data which is selective genotyping too.
#'
#' @param QTL matrix. A q*2 matrix contains the QTL information, where the
#' row dimension 'q' represents the number of QTLs in the chromosomes. The
#' first column labels the chromosomes where the QTLs are located, and the
#' second column labels the positions of QTLs (in morgan (M) or centimorgan
#' (cM)).
#' @param marker matrix. A k*2 matrix contains the marker information,
#' where the row dimension 'k' represents the number of markers in the
#' chromosomes. The first column labels the chromosomes where the markers
#' are located, and the second column labels the positions of markers (in
#' morgan (M) or centimorgan (cM)). It's important to note that chromosomes
#' and positions must be sorted in order.
#' @param geno matrix. A n*k matrix contains the genotypes of k markers
#' for n individuals. The marker genotypes of P1 homozygote (MM),
#' heterozygote (Mm), and P2 homozygote (mm) are coded as 2, 1, and 0,
#' respectively, with NA indicating missing values.
#' @param y vector. A vector that contains the phenotype values of
#' individuals with genotypes.
#' @param yu vector. A vector that contains the phenotype values of
#' individuals without genotypes.
#' @param sele.g character. Determines the type of data being analyzed:
#' If sele.g="n", it considers the data as complete genotyping data. If
#' sele.g="f", it treats the data as selective genotyping data and utilizes
#' the proposed corrected frequency model (Lee 2014) for analysis; If
#' sele.g="t", it considers the data as selective genotyping data and uses
#' the truncated model (Lee 2014) for analysis; If sele.g="p", it treats
#' the data as selective genotyping data and uses the population
#' frequency-based model (Lee 2014) for analysis. Note that the 'yu'
#' argument must be provided when sele.g="f" or "p".
#' @param tL numeric. The lower truncation point of phenotype value when
#' sele.g="t". When sele.g="t" and tL=NULL, the 'yu' argument must be
#' provided. In this case, the function will consider the minimum of 'yu'
#' as the lower truncation point.
#' @param tR numeric. The upper truncation point of phenotype value when
#' sele.g="t". When sele.g="t" and tR=NULL, the 'yu' argument must be
#' provided. In this case, the function will consider the maximum of 'yu'
#' as the upper truncation point.
#' @param method character. When method="EM", it indicates that the interval
#' mapping method by Lander and Botstein (1989) is used in the analysis.
#' Conversely, when method="REG", it indicates that the approximate regression
#' interval mapping method by Haley and Knott (1992) is used in the analysis.
#' @param type character. The population type of the dataset. Includes
#' backcross (type="BC"), advanced intercross population (type="AI"), and
#' recombinant inbred population (type="RI"). The default value is "RI".
#' @param ng integer. The generation number of the population type. For
#' instance, in a BC1 population where type="BC", ng=1; in an AI F3
#' population where type="AI", ng=3.
#' @param D.matrix matrix. The design matrix of QTL effects is a g*p matrix,
#' where g is the number of possible QTL genotypes, and p is the number of
#' effects considered in the MIM model. It's important to note that the QTL
#' number of the design matrix must be the original QTL number plus one. The
#' design matrix can be easily generated by the function D.make(). If set to
#' NULL, it will automatically generate a design matrix with all additive and
#' dominant effects and without any epistasis effect.
#' @param cM logical. Specify the unit of marker position. If cM=TRUE, it
#' denotes centimorgan; if cM=FALSE, it denotes morgan.
#' @param speed numeric. The walking speed of the QTL search (in cM).
#' @param QTLdist numeric. The minimum distance (in cM) among different
#' linked significant QTL. Positions near the known QTLs within this distance
#' will not be considered as candidate positions in the search process.
#' @param link logical. When set to False, positions on the same chromosomes
#' as the known QTLs will not be searched.
#' @param crit numeric. The convergence criterion of EM algorithm.
#' The E and M steps will iterate until a convergence criterion is met.
#' It must be a value between 0 and 1.
#' @param console logical. Determines whether the process of the algorithm
#' will be displayed in the R console or not.
#' @param IMresult list. The data list of the output from IM.search(). The
#' required parameters for this function will be extracted from the data list.
#' @param MIMresult list. The data list of the output from MIM.search() or
#' MIM.points(). The required parameters for this function will be extracted
#' from the data list.
#'
#' @return
#' \item{effect}{The estimated effects, log-likelihood value, and LRT
#' statistics of all searched positions.}
#' \item{QTL.best}{The positions of the best QTL combination.}
#' \item{effect.best}{The estimated effects and LRT statistics of the best
#' QTL combination.}
#' \item{model}{The model of selective genotyping data in this analyze.}
#' \item{inputdata}{The input data of this analysis. It contains marker, geno,
#' y, yu, sele.g, type, ng, cM, and D.matrix. The parameters not provided by
#' the user will be output with default values.}
#'
#' @note
#'
#' When IMresult and MIMresult are entered simultaneously, only IMresult will
#' be processed.
#'
#' If an error occurs, please check whether the original output data of
#' IM.search(), MIM.result(), or MIM.points() is used.
#'
#' @export
#'
#' @references
#'
#' KAO, C.-H. and Z.-B. ZENG 1997 General formulas for obtaining the maximum
#' likelihood estimates and the asymptotic variance-covariance matrix in QTL
#' mapping when using the EM algorithm. Biometrics 53, 653-665. <doi: 10.2307/2533965.>
#'
#' KAO, C.-H., Z.-B. ZENG and R. D. TEASDALE 1999 Multiple interval mapping
#' for Quantitative Trait Loci. Genetics 152: 1203-1216. <doi: 10.1093/genetics/152.3.1203>
#'
#' H.-I LEE, H.-A. HO and C.-H. KAO 2014 A new simple method for improving
#' QTL mapping under selective genotyping. Genetics 198: 1685-1698. <doi: 10.1534/genetics.114.168385.>
#'
#' @seealso
#' \code{\link[QTLEMM]{EM.MIM}}
#' \code{\link[QTLEMM]{IM.search}}
#' \code{\link[QTLEMM]{MIM.points}}
#'
#' @examples
#'
#' # load the example data
#' load(system.file("extdata", "exampledata.RDATA", package = "QTLEMM"))
#'
#' # run and result
#' QTL <- c(1, 23)
#' result <- MIM.search(QTL, marker, geno, y, type = "RI", ng = 2, speed = 15, QTLdist = 50)
#' result$QTL.best
#' result$effect.best
#'
#' \dontrun{
#' # Example for selective genotyping data
#' # load the example data
#' load(system.file("extdata", "exampledata.RDATA", package = "QTLEMM"))
#'
#' # make the seletive genotyping data
#' ys <- y[y > quantile(y)[4] | y < quantile(y)[2]]
#' yu <- y[y >= quantile(y)[2] & y <= quantile(y)[4]]
#' geno.s <- geno[y > quantile(y)[4] | y < quantile(y)[2],]
#'
#' # run and result
#' QTL <- c(1, 23)
#' result <- MIM.search(QTL, marker, geno.s, ys, yu, sele.g = "f",
#' type = "RI", ng = 2, speed = 15, QTLdist = 50)
#' result$QTL.best
#' result$effect.best
#' }
#'
MIM.search <- function(QTL = NULL, marker = NULL, geno = NULL, y = NULL, yu = NULL, sele.g = "n", tL = NULL,
tR = NULL, method = "EM", type = "RI", D.matrix = NULL, ng = 2, cM = TRUE, speed = 1,
QTLdist = 15, link = TRUE, crit = 10^-3, console = interactive(), IMresult = NULL, MIMresult = NULL){
if(!is.null(IMresult)){
check1 <- names(IMresult) == c("effect", "LRT.threshold", "detect.QTL", "model", "inputdata")
if(!is.list(IMresult) | sum(check1) != 5){
stop("IMresult data error, please input all of the original output data of IM.search().", call. = FALSE)
} else {
check2 <- names(IMresult$inputdata) == c("marker", "geno", "y", "yu", "sele.g", "type", "ng", "cM", "d.eff" )
if(!is.list(IMresult$inputdata) | sum(check2) != 9){
stop("IMresult data error, please input all of the original output data of IM.search().", call. = FALSE)
} else {
if(is.data.frame(IMresult$detect.QTL) | is.matrix(IMresult$detect.QTL)){
QTL <- as.matrix(IMresult$detect.QTL[,1:2])
} else {stop("IMresult data error, please input all of the original output data of IM.search().", call. = FALSE)}
marker <- IMresult$inputdata$marker
geno <- IMresult$inputdata$geno
y <- IMresult$inputdata$y
yu <- IMresult$inputdata$yu
sele.g <- IMresult$inputdata$sele.g
type <- IMresult$inputdata$type
ng <- IMresult$inputdata$ng
cM <- IMresult$inputdata$cM
if(is.null(D.matrix)){
D.matrix <- D.make(nrow(QTL)+1, type = type)
if(length(IMresult$inputdata$d.eff) != 0){
if(IMresult$inputdata$d.eff[1] == 0){
D.matrix <- D.make(nrow(QTL)+1, type = type, d = 0)
}
}
}
}
}
} else if(!is.null(MIMresult)){
check1 <- names(MIMresult) == c("effect", "QTL.best", "effect.best", "model", "inputdata")
if(!is.list(MIMresult) | sum(check1) != 5){
stop("MIMresult data error, please input all of the original output data of MIM.search()/MIM.points().", call. = FALSE)
} else {
check2 <- names(MIMresult$inputdata) == c("marker", "geno", "y", "yu", "sele.g", "type", "ng", "cM", "D.matrix" )
if(!is.list(MIMresult$inputdata) | sum(check2) != 9){
stop("MIMresult data error, please input all of the original output data of MIM.search()/MIM.points().", call. = FALSE)
} else {
if(is.data.frame(MIMresult$QTL.best) | is.matrix(MIMresult$QTL.best)){
QTL <- as.matrix(MIMresult$QTL.best[,1:2])
} else {stop("MIMresult data error, please input all of the original output data of MIM.search()/MIM.points().", call. = FALSE)}
marker <- MIMresult$inputdata$marker
geno <- MIMresult$inputdata$geno
y <- MIMresult$inputdata$y
yu <- MIMresult$inputdata$yu
sele.g <- MIMresult$inputdata$sele.g
type <- MIMresult$inputdata$type
ng <- MIMresult$inputdata$ng
cM <- MIMresult$inputdata$cM
if(is.null(D.matrix)){
D.matrix <- D.make(nrow(QTL)+1, type = type)
}
}
}
}
if(is.null(QTL) | is.null(marker) | is.null(geno) | is.null(y)){
stop("Input data is missing. The argument QTL, marker, geno, and y must be input.", call. = FALSE)
}
genotest <- table(c(geno))
datatry <- try(geno*geno, silent=TRUE)
if(class(datatry)[1] == "try-error" | FALSE %in% (names(genotest) %in% c(0, 1, 2)) | length(dim(geno)) != 2){
stop("Genotype data error, please cheak your genotype data.", call. = FALSE)
}
marker <- as.matrix(marker)
markertest <- c(ncol(marker) != 2, NA %in% marker, marker[,1] != sort(marker[,1]), nrow(marker) != ncol(geno))
datatry <- try(marker*marker, silent=TRUE)
if(class(datatry)[1] == "try-error" | TRUE %in% markertest){
stop("Marker data error, or the number of marker does not match the genotype data.", call. = FALSE)
}
QTL <- as.matrix(QTL)
if(ncol(QTL) != 2){QTL <- t(QTL)}
datatry <- try(QTL*QTL, silent=TRUE)
if(class(datatry)[1] == "try-error" | ncol(QTL) != 2 | NA %in% QTL | max(QTL[, 1]) > max(marker[, 1])){
stop("QTL data error, please cheak your QTL data.", call. = FALSE)
}
for(i in 1:nrow(QTL)){
ch0 <- QTL[i, 1]
q0 <- QTL[i, 2]
if(!ch0 %in% marker[, 1]){
stop("The specified QTL is not in the position that can estimate by the marker data.", call. = FALSE)
}
if(q0 > max(marker[marker[, 1] == ch0, 2]) | q0 < min(marker[marker[, 1] == ch0, 2])){
stop("The specified QTL is not in the position that can estimate by the marker data.", call. = FALSE)
}
}
y[is.na(y)] <- mean(y,na.rm = TRUE)
y <- c(y)
datatry <- try(y%*%geno, silent=TRUE)
if(class(datatry)[1] == "try-error"){
stop("Phenotype data error, or the number of individual does not match the genetype data.", call. = FALSE)
}
if(!is.null(yu)){
yu <- yu[!is.na(yu)]
datatry <- try(yu%*%yu, silent=TRUE)
if(class(datatry)[1] == "try-error"){
stop("yu data error, please check your yu data.", call. = FALSE)
}
}
if(!sele.g[1] %in% c("n", "t", "p", "f") | length(sele.g)!=1){
stop("Parameter sele.g error, please check and fix.", call. = FALSE)
}
if(sele.g == "t"){
lrtest <- c(tL[1] < min(c(y,yu)), tR[1] > max(c(y,yu)), tR[1] < tL[1],
length(tL) > 1, length(tR) > 1)
datatry <- try(tL[1]*tR[1], silent=TRUE)
if(class(datatry)[1] == "try-error" | TRUE %in% lrtest){
stop("Parameter tL or tR error, please check and fix.", call. = FALSE)
}
if(is.null(tL) | is.null(tR)){
if(is.null(yu)){
stop("yu data error, the yu data must be input for truncated model when parameter tL or tR is not set.", call. = FALSE)
}
}
}
nq <- nrow(QTL)+1
if(!type[1] %in% c("AI","RI","BC") | length(type) > 1){
stop("Parameter type error, please input AI, RI, or BC.", call. = FALSE)
}
if(!is.null(D.matrix)){
D.matrix <- as.matrix(D.matrix)
datatry <- try(D.matrix*D.matrix, silent=TRUE)
if(type == "BC"){dn0 <- 2
} else {dn0 <- 3}
if(class(datatry)[1] == "try-error" | NA %in% D.matrix | nrow(D.matrix) != dn0^nq){
stop("Parameter D.matrix error, or the combination of genotypes in design matrix is error.",
call. = FALSE)
}
}
if(!is.numeric(ng) | length(ng) > 1 | min(ng) < 1){
stop("Parameter ng error, please input a positive integer.", call. = FALSE)
}
ng <- round(ng)
if(!cM[1] %in% c(0,1) | length(cM > 1)){cM <- TRUE}
if(!link[1] %in% c(0,1) | length(link > 1)){link <- TRUE}
if(!is.numeric(speed) | length(speed) > 1 | min(speed) < 0){
stop("Parameter speed error, please input a positive number.", call. = FALSE)
}
if(!is.numeric(QTLdist) | length(QTLdist) > 1 | min(QTLdist) < speed*2){
stop("Parameter QTLdist error, please input a bigger positive number.", call. = FALSE)
}
if(!is.numeric(crit) | length(crit) > 1 | min(crit) <= 0 | max(crit) >= 1){
stop("Parameter crit error, please input a positive number between 0 and 1.", call. = FALSE)
}
if(!console[1] %in% c(0,1) | length(console) > 1){console <- TRUE}
if(!cM){
QTL <- QTL*100
marker[, 2] <- marker[, 2]*100
}
if(is.null(D.matrix)){
D.matrix <- D.make(as.numeric(nq), type = type)
}
cat(paste("chr", "cM", "LRT", "log.likelihood", "known QTL", "\n", sep = "\t")[console])
if(method == "EM"){
meth <- methEM
if(sele.g == "p" | sele.g == "f"){
ya <- c(y, yu)
} else {
ya <- y
}
} else if (method=="REG"){
if(sele.g == "p" | sele.g == "f"){
ya <- c(y, yu)
} else {
ya <- y
}
meth <- methSG
}
name0 <- cbind(paste("QTL", 1:(nq-1), ".ch", sep = ""), paste("QTL", 1:(nq-1), ".cM", sep = ""))
name0 <- rbind(name0, c("new.ch", "new.cM"))
knownQTL <- c()
for(i in 1:(nq-1)){
knownQTL <- c(paste(knownQTL, " [", QTL[i, 1], ",", QTL[i, 2], "]", sep = ""))
}
effect <- c()
cr0 <- sort(unique(marker[, 1]))
if(!link){
cr0 <- cr0[!cr0 %in% QTL[, 1]]
}
if(length(cr0) < 1){
stop("No searchable positions.", call. = FALSE)
}
for(i in cr0){
cr <- marker[marker[, 1] == i,]
minpos <- min(cr[, 2])
if(speed%%1 == 0){minpos = ceiling(min(cr[, 2]))}
QTLs0 <- seq(minpos, (max(cr[,2])), speed)
QTLc0 <- QTL[QTL[, 1] == i,]
if(ncol(as.matrix(QTLc0))==1){
QTLc0 = t(as.matrix(QTLc0))
}
if(nrow(QTLc0) != 0){
for(k in 1:nrow(QTLc0)){
QTLs0 <- QTLs0[!(QTLs0 <= QTLc0[k,2]+QTLdist & QTLs0 >= QTLc0[k,2]-QTLdist)]
}
}
for(j in QTLs0){
QTL0 <- rbind(QTL, c(i,j))
fit0 <- meth(QTL0, marker, geno, D.matrix, y, yu, tL, tR, type, ng, sele.g, crit, cM, ya)
effect0 <- c(t(QTL0), fit0[[1]], fit0[[4]], fit0[[5]], fit0[[6]])
LRT0 <- round(effect0[length(effect0)-2], 3)
like <- round(effect0[length(effect0)-1], 5)
cat(paste(i, j, LRT0, like, knownQTL, "\n", sep = "\t")[console])
effect <- rbind(effect, effect0)
}
model <- fit0[[7]]
}
colnames(effect) <- c(t(name0), colnames(D.matrix), "LRT", "log.likelihood", "R2")
row.names(effect) <- 1:nrow(effect)
b0 <- effect[,c(length(name0)-1, length(name0), ncol(effect)-1)]
bs <- c()
for(i in cr0){
b1 <- b0[b0[,1] == i,]
eff1 <- effect[b0[,1] == i,]
b2 <- nrow(b1)
b3 <- b1[-c(1,b2),3]-b1[-c(b2-1,b2),3]
b4 <- b1[-c(1,b2),3]-b1[-(1:2),3]
b5 <- which(b3 > 0 & b4 > 0)
bs <- rbind(bs, eff1[b5+1,])
}
best <- bs[bs[,ncol(bs)-1] == max(bs[,ncol(bs)-1]),]
QTL.best <- matrix(best[1:(2*nq)], nq, 2, byrow = TRUE)
colnames(QTL.best) <- c("chromosome", "position(cM)")
row.names(QTL.best) <- c(paste("QTL", 1:(nq-1)), "QTL new")
effect.best <- best[-(1:(2*nq))]
inputdata <- list(marker = marker, geno = geno, y = y, yu = yu, sele.g = sele.g, type = type, ng = ng, cM = cM,
D.matrix = D.matrix)
result <- list(effect = effect, QTL.best = QTL.best, effect.best = effect.best, model = model, inputdata = inputdata)
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.