Nothing
###############
# MQE_forward #
###############
# Forward regression with different type of QTL effects
#
# Determines a multi-QTL effect (MQE) model using a forward regression.
#
# The included QTL position can have different type of QTL effects at different
# loci. At each step (new added position), the function compute QTL profiles
# using one type of QTL effect specified in \code{Q.eff} for the tested
# position. Let us assume that the user want to allow the QTL to have a
# parental, ancestral or bi-allelic effect. Therefore, the function will
# calculate three profiles. Then it will select in each profile the most
# significant position if a position is above the \code{threshold} value.
# The position and its QTL effect that increase the most the global adjusted R
# squared (\code{MQE_R2}) will be selected as the new QTL position and included
# in the list of cofactors. The function continues to iterate until no position
# is significant anymore.
#
# \strong{WARNING!(1)} The computation of \code{MQE_forward()} function using
# mixed models (all models with \code{VCOV} different than \code{"h.err"})
# is technically possible but can be irrealistic in practice due to a reduced
# computer power. Since a mixed model is computed at each single position it
# can take a lot of time. From our estimation it can take between 20 to 50
# times more time than for the linear model (HRT). If the number of detected
# QTL is supposed to be small (until 5) it could still be feasible.
#
# \strong{WARNING!(2)} The computation of random pedigree models
# (\code{VCOV = "pedigree" and "ped_cr.err"}) can sometimes fail. This could be
# due to singularities due to a strong correlation between the QTL term(s) and
# the polygenic term. This situation can appear in the parental model.
# the error can also sometimes come from the \code{asreml()} function. From
# our experience, in that case, trying to re-run the function one or two times
# allow to obtain a result.
#
# @param mppData An object of class \code{mppData}
#
# @param trait \code{Numerical} or \code{character} indicator to specify which
# trait of the \code{mppData} object should be used. Default = 1.
#
# @param Q.eff \code{Character} vector of possible QTL effects the user want to
# test. Elements of Q.eff can be "cr", "par", "anc" or "biall". For details
# look at \code{\link{mpp_SIM}}.
#
# @param VCOV \code{Character} expression defining the type of variance
# covariance structure used: 1) "h.err" for an homogeneous variance residual term
# (HRT) linear model; 2) "h.err.as" for a HRT model fitted by REML using
# \code{ASReml-R}; 3) "cr.err" for a cross-specific variance residual terms
# (CSRT) model; 4) "pedigree" for a random pedigree term and HRT model;
# and 5) "ped_cr.err" for random pedigree and CSRT model.
# For more details see \code{\link{mpp_SIM}}. Default = "h.err".
#
# @param threshold \code{Numeric} value representing the -log10(p-value) threshold
# above which a position can be considered as significant. Default = 4.
#
# @param window \code{Numeric} distance (cM) on the left and the right of a
# cofactor position where it is not included in the model. Default = 30.
#
# @param n.cores \code{Numeric}. Specify here the number of cores you like to
# use. Default = 1.
#
# @param verbose \code{Logical} value indicating if the steps of the
# MQE_forward should be printed. It will not affect the printing of the other
# functions called by \code{mpp_proc()}, especially the printing of
# \code{asreml()}. Default = TRUE.
#
#
# @return
#
# \item{QTL.list }{\code{Data.frame} with six columns :
# 1) QTL marker or in between position names; 2) chromosomes;
# 3) interger position indicators on the chromosome;
# 4) positions in centi-Morgan; 5) -log10(p-values); and 6) type of
# QTL incidence matrix of the selected positions}
#
# @author Vincent Garin
#
# @seealso \code{\link{mpp_perm}},
# \code{\link{mpp_SIM}}, \code{\link{MQE_R2}}
#
# @examples
#
# data(mppData)
#
# QTL <- MQE_forward(mppData = mppData, Q.eff = c("par", "anc", "biall"))
#
#
# @export
#
MQE_forward <- function(mppData = NULL, trait = 1, Q.eff, VCOV = "h.err",
threshold = 4, window = 30, n.cores = 1,
verbose = TRUE) {
# 1. test data format
#####################
check.MQE(mppData = mppData, trait = trait, Q.eff = Q.eff,
VCOV = VCOV, n.cores = n.cores, fct = "forward")
# Sub-function for R squared computation
R2.sg <- function(mppData, QTL, Q.eff, trait){
MQE_R2(mppData = mppData, trait = trait, QTL = QTL, Q.eff = Q.eff,
glb.only = TRUE)[[2]]
}
# initialize list of selected QTL and list of type of effects.
QTL.list <- c()
QTL.eff <- c()
pos.ind <- 1
# form cluster
if(n.cores > 1){
parallel <- TRUE
cluster <- makeCluster(n.cores)
} else {
parallel <- FALSE
cluster <- NULL
}
# 2. step 1: SIM
################
# vector to store the candidate positions
max.pval <- c()
cand.pos <- c()
for (i in 1:length(Q.eff)) {
Q.eff_i <- Q.eff[i]
SIM <- mpp_SIM_clu(mppData = mppData, trait = trait, Q.eff = Q.eff_i,
parallel = parallel, cluster = cluster)
# store the candidate position
max.pval_i <- max(SIM$log10pval, na.rm = TRUE)
max.pval <- c(max.pval, max.pval_i)
if (max.pval_i > threshold) {
cand.pos_i <- data.frame(mppData$map[which.max(SIM$log10pval), 1], Q.eff_i,
stringsAsFactors = FALSE)
cand.pos <- rbind.data.frame(cand.pos, cand.pos_i)
}
}
if(!is.null(cand.pos)) {colnames(cand.pos) <- c("mk.names", "Q.eff")}
if (!is.null(cand.pos)) {
if(verbose){
cat("\n")
cat(paste("position", pos.ind))
cat("\n")
}
R2.pos1 <- mapply(FUN = R2.sg, QTL = cand.pos[, 1], Q.eff = cand.pos[, 2],
MoreArgs = list(mppData = mppData, trait = trait))
# test which position has the highest R squared
if ((length(R2.pos1) != 1) && (length(unique(R2.pos1)) == 1)) {
sel.pos_i <- cand.pos[which.max(max.pval), ]
# two pos with max values, take the first one.
if (dim(sel.pos_i)[1] > 1) { sel.pos_i <- sel.pos_i[1, ] }
} else {
sel.pos_i <- cand.pos[which.max(R2.pos1), ]
if (dim(sel.pos_i)[1] > 1) {sel.pos_i <- sel.pos_i[1, ] }
}
QTL.list <- c(QTL.list, unlist(sel.pos_i[1]))
QTL.eff <- c(QTL.eff, unlist(sel.pos_i[2]))
pos.ind <- pos.ind + 1
} else {
warning(paste("No position is above the threshold, the stepwise procedure",
"could not select any QTL position."))
return(NULL)
}
# 3. Step 2 : CIM with cofactors (QTL already selected)
#######################################################
rest.sign.pos <- TRUE
while(rest.sign.pos) {
max.pval <- c()
cand.pos <- c()
for (i in 1:length(Q.eff)) {
Q.eff_i <- Q.eff[i]
log10.pval <- MQE_CIM_clu(mppData = mppData, trait = trait,
Q.eff = Q.eff_i,
cofactors = QTL.list, cof.Qeff = QTL.eff,
parallel = parallel, cluster = cluster)
# select the position with the highest p-value that is not in an
# already selected QTL region.
# make an index of positions that are not in a QTL region
pos.QTL <- mppData$map[mppData$map[, 1] %in% QTL.list, c(2,4)]
test.cof <- function(x, map, window) {
t1 <- map$chr == as.numeric(x[1])
t2 <- abs(map$pos.cM - as.numeric(x[2])) < window
(t1 & t2)
}
cof.part <- apply(X = pos.QTL, MARGIN = 1, FUN = test.cof,
map = mppData$map, window = window)
log10.pval <- log10.pval[rowSums(cof.part) == 0, ]
max.pval_i <- max(log10.pval$log10pval, na.rm = TRUE)
if (max.pval_i > threshold) {
max.pval <- c(max.pval, max.pval_i)
cand.pos_i <- data.frame(log10.pval[which.max(log10.pval$log10pval), 1],
Q.eff_i, stringsAsFactors = FALSE)
cand.pos <- rbind.data.frame(cand.pos, cand.pos_i)
}
}
if(!is.null(cand.pos)) {colnames(cand.pos) <- c("mk.names", "Q.eff")}
# test if some position has been added. If yes, determine which one
# increase the R squared
if (!is.null(cand.pos)) {
if(verbose){
cat("\n")
cat(paste("position", pos.ind))
cat("\n")
}
# evaluate which position give the best extra adjusted R squared
QTL.list_i <- lapply(X = cand.pos[, 1],
FUN = function(x, Q.l) c(Q.l, x), Q.l = QTL.list)
QTL.eff_i <- lapply(X = cand.pos[, 2],
FUN = function(x, Q.e) c(Q.e, x), Q.e = QTL.eff)
# R2 ith position
R2.posi <- mapply(FUN = R2.sg, QTL = QTL.list_i, Q.eff = QTL.eff_i,
MoreArgs = list(mppData = mppData, trait = trait))
# test which position has the highest R squared
if ((length(R2.posi) != 1) & (length(unique(R2.posi)) == 1)) {
sel.pos_i <- cand.pos[which.max(max.pval), ]
if (dim(sel.pos_i)[1] > 1) { sel.pos_i <- sel.pos_i[1, ] }
} else {
sel.pos_i <- cand.pos[which.max(R2.posi), ]
if (dim(sel.pos_i)[1] > 1) {sel.pos_i <- sel.pos_i[1, ] }
}
QTL.list <- c(QTL.list, unlist(sel.pos_i[1]))
QTL.eff <- c(QTL.eff, unlist(sel.pos_i[2]))
pos.ind <- pos.ind + 1
} else { rest.sign.pos <- FALSE }
} # end while loop
if(n.cores > 1){stopCluster(cluster)}
# return the final list of cofactors (QTL)
results <- mppData$map[mppData$map[, 1] %in% QTL.list, ]
names(QTL.eff) <- QTL.list
QTL.eff2 <- QTL.eff[results[, 1]]
results <- data.frame(results, QTL.eff2, stringsAsFactors = FALSE)
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.