Nothing
######################################################################
# stepwiseqtl.R
#
# copyright (c) 2007-2021, Karl W Broman
# last modified Mar, 2021
# first written Nov, 2007
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 3, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# merchantability or fitness for a particular purpose. See the GNU
# General Public License, version 3, for more details.
#
# A copy of the GNU General Public License, version 3, is available
# at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: stepwiseqtl, calc.plod, countqtlterms, calc.penalties,
# checkStepwiseqtlStart
#
######################################################################
######################################################################
# stepwiseqtl
#
# perform forward and backward selection to identify multiple QTL
#
# cross: cross object
# chr: chromosomes to consider
# pheno.col: phenotype column
# qtl (Optional) If given, qtl object used at start of
# forward selection. If missing, we start at the null
# model.
# formula If given, formula used with the qtl object for the model
# at the start of forward selection
# max.qtl: maximum no. QTL in forward selection
# covar: data.frame with covariates (strictly additive at this point)
# method: imputation or Haley-Knott regression
# incl.markers: If TRUE, include marker positions in scan; if FALSE,
# just use the grid
# refine.locations: If TRUE, refine the QTL positions at each step
# additive.only: If TRUE, don't scan for interactions
# scan.pairs: If TRUE, do a pairscan at each step
# penalties: Vector with 3 values: the penalties on main effects
# followed by the heavy and light interaction penalties.
# (if missing, we use default values derived via
# simulation)
# keeplodprofile If TRUE, perform one last pass of refineqtl and save
# the LOD profiles.
# keeptrace If TRUE, retain the QTL locations, model formula and pLOD
# for the best model from each step of forward and backward
# selection as an attribute in the output
# verbose: If TRUE, print a bunch of tracing information
######################################################################
stepwiseqtl <-
function(cross, chr, pheno.col=1, qtl, formula, max.qtl=10, covar=NULL,
method=c("imp", "hk"), model=c("normal", "binary"), incl.markers=TRUE, refine.locations=TRUE,
additive.only=FALSE, scan.pairs=FALSE, penalties,
keeplodprofile=TRUE, keeptrace=FALSE, verbose=TRUE,
tol=1e-4, maxit=1000, require.fullrank=FALSE)
{
if(!inherits(cross, "cross"))
stop("Input should have class \"cross\".")
if(!missing(chr)) cross <- subset(cross, chr)
if(missing(qtl)) qtl <- NULL
if(missing(formula)) formula <- NULL
method <- match.arg(method)
model <- match.arg(model)
# force covar to be a data frame
if(!is.null(covar) && !is.data.frame(covar)) {
if(is.matrix(covar) && is.numeric(covar))
covar <- as.data.frame(covar, stringsAsFactors=TRUE)
else stop("covar should be a data.frame")
}
if(!missing(penalties)) {
if(is.matrix(penalties)) {
penalties <- penalties[1,]
warning("penalties should be a vector; only the first row will be used")
}
if(length(penalties)==6) { # X-chr-specific penalties
chr_type <- vapply(cross$geno, chrtype, "")
if(!all(chr_type=="A")) {
if(scan.pairs)
warning("scan.pairs=TRUE not implemented X-chr specific penalties; ignored.")
return(stepwiseqtlX(cross, chrnames(cross), pheno.col=pheno.col, qtl=qtl,
formula=formula, max.qtl=max.qtl, k_f=3, stop.rule=0,
covar=covar, method=method, model=model, incl.markers=incl.markers,
refine.locations=refine.locations, additive.only=additive.only,
penalties=penalties, keeplodprofile=keeplodprofile, keeptrace=keeptrace,
verbose=verbose, tol=tol, maxit=maxit, require.fullrank=require.fullrank))
}
penalties <- penalties[c(1,3,4)] # just the autosomal penalties
}
}
if(LikePheVector(pheno.col, nind(cross), nphe(cross))) {
cross$pheno <- cbind(pheno.col, cross$pheno)
pheno.col <- 1
}
chr_type <- sapply(cross$geno, chrtype)
if(any(chr_type=="X")) {
Xadjustment <- scanoneXnull(crosstype(cross), getsex(cross), attributes(cross))
forceXcovar <- Xadjustment$adjustX
Xcovar <- Xadjustment$sexpgmcovar
}
else forceXcovar <- FALSE
if(!is.null(qtl)) { # start f.s. at somewhere other than the null
if( !inherits(qtl, "qtl") )
stop("The qtl argument must be an object of class \"qtl\".")
# check that chromosomes were retained, otherwise give error
m <- is.na(match(qtl$chr, names(cross$geno)))
if(any(m)) {
wh <- qtl$chr[m]
if(length(wh) > 1)
stop("Chromosomes ", paste(wh, collapse=", "), " (in QTL object) not in cross object.")
else
stop("Chromosome ", wh, " (in QTL object) not in cross object.")
}
if(is.null(formula)) { # create a formula with all covariates and all QTL add've
if(!is.null(covar))
formula <- paste("y ~ ", paste(names(covar), collapse="+"), "+")
else
formula <- "y ~ "
formula <- paste(formula, paste(paste("Q", 1:length(qtl$chr), sep=""), collapse="+"))
}
else {
temp <- checkStepwiseqtlStart(qtl, formula, covar)
qtl <- temp$qtl
formula <- temp$formula
}
startatnull <- FALSE
}
else {
if(!is.null(formula))
warning("formula ignored if qtl is not provided.")
startatnull <- TRUE
}
# revise names in qtl object
if(!startatnull)
qtl$name <- qtl$altname
# check that we have the right stuff for the selected method
if(method=="imp") {
if(!("draws" %in% names(cross$geno[[1]]))) {
if("prob" %in% names(cross$geno[[1]])) {
warning("The cross doesn't contain imputations; using method=\"hk\".")
method <- "hk"
}
else
stop("You need to first run sim.geno.")
}
}
else {
if(!("prob" %in% names(cross$geno[[1]]))) {
if("draws" %in% names(cross$geno[[1]])) {
warning("The cross doesn't contain QTL genotype probabilities; using method=\"imp\".")
method <- "imp"
}
else
stop("You need to first run calc.genoprob.")
}
}
if(method=="imp") qtlmethod <- "draws"
else qtlmethod <- "prob"
if(!is.null(qtl) && qtl$n.ind != nind(cross)) {
map <- attr(qtl, "map") # save map
warning("No. individuals in qtl object doesn't match that in the input cross; re-creating qtl object.")
if(method=="imp")
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
else
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="prob")
attr(qtl, "map") <- map
}
if(!is.null(qtl) && method=="imp" && dim(qtl$geno)[3] != dim(cross$geno[[1]]$draws)[3]) {
map <- attr(qtl, "map") # save map
warning("No. imputations in qtl object doesn't match that in the input cross; re-creating qtl object.")
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
attr(qtl, "map") <- map
}
# check that qtl object matches the method
if(!startatnull) {
if(method=="imp" && !("geno" %in% names(qtl)))
stop("The qtl object doesn't contain imputations; re-run makeqtl with what=\"draws\".")
else if(method=="hk" && !("prob" %in% names(qtl)))
stop("The qtl object doesn't contain QTL genotype probabilities; re-run makeqtl with what=\"prob\".")
}
# check phenotypes and covariates; drop ind'ls with missing values
if(length(pheno.col) > 1) {
pheno.col <- pheno.col[1]
warning("stepwiseqtl can take just one phenotype; only the first will be used")
}
if(is.character(pheno.col)) {
num <- find.pheno(cross, pheno.col)
if(is.na(num))
stop("Couldn't identify phenotype \"", pheno.col, "\"")
pheno.col <- num
}
if(any(pheno.col < 1 | pheno.col > nphe(cross)))
stop("pheno.col values should be between 1 and the no. phenotypes")
pheno <- cross$pheno[,pheno.col]
if(!is.null(covar)) phcovar <- cbind(pheno, covar)
else phcovar <- as.data.frame(pheno, stringsAsFactors=TRUE)
hasmissing <- rowSums(is.na(phcovar)) > 0
if(all(hasmissing))
stop("All individuals are missing phenotypes or covariates.")
if(any(hasmissing)) {
pheno <- pheno[!hasmissing]
cross <- subset(cross, ind=!hasmissing)
if(!is.null(covar)) covar <- covar[!hasmissing,,drop=FALSE]
if(forceXcovar) Xcovar <- Xcovar[!hasmissing,,drop=FALSE]
if(!startatnull) {
if(method=="imp")
qtl$geno <- qtl$geno[!hasmissing,,,drop=FALSE]
else {
for(i in seq(along=qtl$prob))
qtl$prob[[i]] <- qtl$prob[[i]][!hasmissing,,drop=FALSE]
}
qtl$n.ind <- sum(!hasmissing)
}
}
if(max.qtl < 1)
stop("Need max.qtl > 0 if we are to scan for qtl")
if(is.null(covar)) {
lod0 <- 0
if(startatnull)
firstformula <- y~Q1
else firstformula <- formula
}
else {
nullformula <- as.formula(paste("y~", paste(names(covar), collapse="+")))
tempqtl <- makeqtl(cross, chrnames(cross)[1], 0, what=ifelse(method=="imp", "draws", "prob"))
fit <- fitqtl(cross, pheno.col, tempqtl, covar=covar, formula=nullformula,
method=method, model=model, dropone=FALSE, get.ests=FALSE,
run.checks=FALSE, tol=tol, maxit=maxit, forceXcovar=forceXcovar)
lod0 <- fit$result.full[1,4]
if(startatnull)
firstformula <- as.formula(paste("y~", paste(names(covar), collapse="+"), "+", "Q1"))
else firstformula <- formula
}
# penalties
cross.type <- crosstype(cross)
if(missing(penalties)) {
if(cross.type=="f2") {
penalties <- c(3.52, 4.28, 2.69)
}
else if(cross.type=="bc") {
penalties <- c(2.69, 2.62, 1.19)
}
else
stop("No default penalties available for cross type ", cross.type)
}
else if(length(penalties) != 3) {
if(length(penalties)==1) {
if(additive.only)
penalties <- c(penalties,Inf,Inf)
else
stop("You must include a penalty for interaction terms.")
}
else {
if(length(penalties)==2)
penalties <- penalties[c(1,2,2)]
else {
warning("penalties should have length 3")
penalties <- penalties[1:3]
}
}
}
if(verbose > 2) verbose.scan <- TRUE
else verbose.scan <- FALSE
curbest <- NULL
curbestplod <- 0
# initial scan : either 1d or 2d
if(verbose) cat(" -Initial scan\n")
if(startatnull) {
if(forceXcovar) {
if(is.null(covar)) covar.w.X <- Xcovar
else covar.w.X <- cbind(covar, Xcovar)
}
else covar.w.X <- covar
if(additive.only || max.qtl == 1 || !scan.pairs) {
suppressWarnings(out <- scanone(cross, pheno.col=pheno.col, method=method, model=model,
addcovar=covar.w.X))
lod <- max(out[,3], na.rm=TRUE)
if(verbose) cat("initial lod: ", lod, "\n")
curplod <- calc.plod(lod, c(1,0,0), penalties=penalties)
wh <- which(!is.na(out[,3]) & out[,3]==lod)
if(length(wh) > 1) wh <- sample(wh, 1)
qtl <- makeqtl(cross, as.character(out[wh,1]), out[wh,2], "Q1",
what=qtlmethod)
formula <- firstformula
n.qtl <- 1
}
else {
suppressWarnings(out <- scantwo(cross, pheno.col=pheno.col, method=method, model=model,
incl.markers=incl.markers, addcovar=covar.w.X, verbose=verbose.scan))
lod <- out$lod
lod1 <- max(diag(lod), na.rm=TRUE)
plod1 <- calc.plod(lod1, c(1,0,0), penalties=penalties)
loda <- max(lod[upper.tri(lod)], na.rm=TRUE)
ploda <- calc.plod(loda, c(2,0,0),
penalties=penalties)
lodf <- max(lod[lower.tri(lod)], na.rm=TRUE)
plodf <- calc.plod(lodf, c(2,0,1),
penalties=penalties)
if(plod1 > ploda && plod1 > plodf) {
wh <- which(!is.na(diag(lod)) & diag(lod) == lod1)
if(length(wh) > 1) wh <- sample(wh, 1)
m <- out$map[wh,]
qtl <- makeqtl(cross, as.character(m[1,1]), m[1,2], "Q1", what=qtlmethod)
formula <- firstformula
n.qtl <- 1
lod <- lod1
curplod <- plod1
}
else if(ploda > plodf) {
temp <- max(out, what="add")
if(nrow(temp) > 1)
temp <- temp[sample(1:nrow(temp),1),]
qtl <- makeqtl(cross, c(as.character(temp[1,1]), as.character(temp[1,2])),
c(temp[1,3], temp[1,4]), c("Q1","Q2"), what=qtlmethod)
formula <- as.formula(paste(deparseQTLformula(firstformula), "+Q2", sep=""))
curplod <- ploda
lod <- loda
n.qtl <- 2
}
else {
temp <- max(out, what="full")
if(nrow(temp) > 1)
temp <- temp[sample(1:nrow(temp),1),]
qtl <- makeqtl(cross, c(as.character(temp[1,1]), as.character(temp[1,2])),
c(temp[1,3], temp[1,4]), c("Q1","Q2"), what=qtlmethod)
formula <- as.formula(paste(deparseQTLformula(firstformula), "+Q2+Q1:Q2", sep=""))
curplod <- plodf
lod <- lodf
n.qtl <- 2
}
}
} # start at null
else {
if(verbose) cat(" ---Starting at a model with", length(qtl$chr), "QTL\n")
if(refine.locations) {
if(verbose) cat(" ---Refining positions\n")
rqtl <- refineqtl(cross, pheno.col=pheno.col, qtl=qtl,
covar=covar, formula=formula, method=method,
model=model,
verbose=verbose.scan, incl.markers=incl.markers,
keeplodprofile=FALSE, forceXcovar=forceXcovar)
if(any(rqtl$pos != qtl$pos)) { # updated positions
if(verbose) cat(" --- Moved a bit\n")
}
qtl <- rqtl
}
fit <- fitqtl(cross, pheno.col, qtl, covar=covar, formula=formula,
method=method, model=model, dropone=FALSE, get.ests=FALSE,
run.checks=FALSE, tol=tol, maxit=maxit, forceXcovar=forceXcovar)
lod <- fit$result.full[1,4] - lod0
if(require.fullrank && attr(fit, "matrix.rank") < attr(fit, "matrix.ncol")) lod <- 0
curplod <- calc.plod(lod, countqtlterms(formula, ignore.covar=TRUE),
penalties=penalties)
attr(qtl, "pLOD") <- curplod
n.qtl <- length(qtl$chr)
}
attr(qtl, "formula") <- deparseQTLformula(formula)
attr(qtl, "pLOD") <- curplod
if(curplod > 0) {
curbest <- qtl
curbestplod <- curplod
if(verbose)
cat("** new best ** (pLOD increased by ", round(curplod, 4), ")\n", sep="")
}
if(keeptrace) {
temp <- list(chr=qtl$chr, pos=qtl$pos)
attr(temp, "formula") <- deparseQTLformula(formula)
attr(temp, "pLOD") <- curplod
class(temp) <- c("compactqtl", "list")
thetrace <- list("0"=temp)
}
if(verbose)
cat(" no.qtl = ", n.qtl, " pLOD =", curplod, " formula:",
deparseQTLformula(formula), "\n")
if(verbose > 1)
cat(" qtl:", paste(qtl$chr, round(qtl$pos,1), sep="@"), "\n")
# start stepwise search
i <- 0
while(n.qtl < max.qtl) {
i <- i+1
if(verbose) {
cat(" -Step", i, "\n")
cat(" ---Scanning for additive qtl\n")
}
out <- addqtl(cross, pheno.col=pheno.col, qtl=qtl, covar=covar,
formula=formula, method=method, model=model, incl.markers=incl.markers,
verbose=verbose.scan, forceXcovar=forceXcovar,
require.fullrank=require.fullrank)
curlod <- max(out[,3], na.rm=TRUE)
wh <- which(!is.na(out[,3]) & out[,3]==curlod)
if(length(wh) > 1) wh <- sample(wh,1)
curqtl <- addtoqtl(cross, qtl, as.character(out[wh,1]), out[wh,2],
paste("Q", n.qtl+1, sep=""))
curformula <- as.formula(paste(deparseQTLformula(formula), "+Q", n.qtl+1, sep=""))
curlod <- curlod + lod
curplod <- calc.plod(curlod, countqtlterms(curformula, ignore.covar=TRUE),
penalties=penalties)
if(verbose) cat(" plod =", curplod, "\n")
curnqtl <- n.qtl+1
if(!additive.only) {
for(j in 1:n.qtl) {
if(verbose)
cat(" ---Scanning for QTL interacting with Q", j, "\n", sep="")
thisformula <- as.formula(paste(deparseQTLformula(formula), "+Q", n.qtl+1,
"+Q", j, ":Q", n.qtl+1, sep=""))
out <- addqtl(cross, pheno.col=pheno.col, qtl=qtl, covar=covar,
formula=thisformula, method=method, model=model, incl.markers=incl.markers,
verbose=verbose.scan, forceXcovar=forceXcovar,
require.fullrank=require.fullrank)
thislod <- max(out[,3], na.rm=TRUE)
wh <- which(!is.na(out[,3]) & out[,3]==thislod)
if(length(wh) > 1) wh <- sample(wh,1)
thisqtl <- addtoqtl(cross, qtl, as.character(out[wh,1]), out[wh,2],
paste("Q", n.qtl+1, sep=""))
thislod <- thislod + lod
thisplod <- calc.plod(thislod, countqtlterms(thisformula, ignore.covar=TRUE),
penalties=penalties)
if(verbose) cat(" plod =", thisplod, "\n")
if(thisplod > curplod) {
curformula <- thisformula
curplod <- thisplod
curlod <- thislod
curqtl <- thisqtl
curnqtl <- n.qtl+1
}
}
if(n.qtl > 1) {
if(verbose)
cat(" ---Look for additional interactions\n")
temp <- addint(cross, pheno.col, qtl, covar=covar, formula=formula,
method=method, model=model, qtl.only=TRUE, verbose=verbose.scan,
require.fullrank=require.fullrank)
if(!is.null(temp)) {
thislod <- max(temp[,3], na.rm=TRUE)
wh <- which(!is.na(temp[,3]) & temp[,3] == thislod)
if(length(wh) > 1) wh <- sample(wh, 1)
thisformula <- as.formula(paste(deparseQTLformula(formula), "+", rownames(temp)[wh]))
thislod <- thislod + lod
thisplod <- calc.plod(thislod, countqtlterms(thisformula, ignore.covar=TRUE),
penalties=penalties)
if(verbose) cat(" plod =", thisplod, "\n")
if(thisplod > curplod) {
curformula <- thisformula
curplod <- thisplod
curlod <- thislod
curqtl <- qtl
curnqtl <- n.qtl
}
}
}
if(scan.pairs) {
if(verbose)
cat(" ---Scan for an additional pair\n")
out <- addpair(cross, pheno.col=pheno.col, qtl=qtl, covar=covar,
formula=formula, method=method, model=model, incl.markers=incl.markers,
verbose=verbose.scan, forceXcovar=forceXcovar)
thelod <- out$lod
loda <- max(thelod[upper.tri(thelod)], na.rm=TRUE)
ploda <- calc.plod(loda+lod, c(2,0,0,0)+countqtlterms(formula, ignore.covar=TRUE),
penalties=penalties)
lodf <- max(thelod[lower.tri(thelod)], na.rm=TRUE)
plodf <- calc.plod(lodf+lod, c(2,0,1,1)+countqtlterms(formula, ignore.covar=TRUE),
penalties=penalties)
if(verbose) {
cat(" ploda =", ploda, "\n")
cat(" plodf =", plodf, "\n")
}
if(ploda > curplod && loda > plodf) {
temp <- max(out, what="add")
if(nrow(temp) > 1)
temp <- temp[sample(1:nrow(temp),1),]
curqtl <- addtoqtl(cross, qtl, c(as.character(temp[1,1]), as.character(temp[1,2])),
c(temp[1,3], temp[1,4]), paste("Q", n.qtl+1:2, sep=""))
curformula <- as.formula(paste(deparseQTLformula(formula), "+Q", n.qtl+1, "+Q",
n.qtl+2, sep=""))
curplod <- ploda
lod <- loda+lod
curnqtl <- n.qtl+2
}
else if(plodf > curplod) {
temp <- max(out, what="full")
if(nrow(temp) > 1)
temp <- temp[sample(1:nrow(temp),1),]
curqtl <- addtoqtl(cross, qtl, c(as.character(temp[1,1]), as.character(temp[1,2])),
c(temp[1,3], temp[1,4]), paste("Q", n.qtl+1:2, sep=""))
curformula <- as.formula(paste(deparseQTLformula(formula), "+Q", n.qtl+1, "+Q",
n.qtl+2, "+Q", n.qtl+1, ":Q", n.qtl+2,
sep=""))
curplod <- plodf
lod <- lodf+lod
curnqtl <- n.qtl+2
}
}
}
qtl <- curqtl
n.qtl <- curnqtl
attr(qtl, "formula") <- deparseQTLformula(curformula)
attr(qtl, "pLOD") <- curplod
formula <- curformula
lod <- curlod
if(refine.locations) {
if(verbose) cat(" ---Refining positions\n")
rqtl <- refineqtl(cross, pheno.col=pheno.col, qtl=qtl,
covar=covar, formula=formula, method=method, model=model,
verbose=verbose.scan, incl.markers=incl.markers,
keeplodprofile=FALSE, forceXcovar=forceXcovar)
if(any(rqtl$pos != qtl$pos)) { # updated positions
if(verbose) cat(" --- Moved a bit\n")
qtl <- rqtl
fit <- fitqtl(cross, pheno.col, qtl, covar=covar, formula=formula,
method=method, model=model, dropone=FALSE, get.ests=FALSE,
run.checks=FALSE, tol=tol, maxit=maxit, forceXcovar=forceXcovar)
lod <- fit$result.full[1,4] - lod0
if(require.fullrank && attr(fit, "matrix.rank") < attr(fit, "matrix.ncol")) lod <- 0
curplod <- calc.plod(lod, countqtlterms(formula, ignore.covar=TRUE),
penalties=penalties)
attr(qtl, "pLOD") <- curplod
}
}
if(verbose)
cat(" no.qtl = ", n.qtl, " pLOD =", curplod, " formula:",
deparseQTLformula(formula), "\n")
if(verbose > 1)
cat(" qtl:", paste(qtl$chr, round(qtl$pos,1), sep="@"), "\n")
if(curplod > curbestplod) {
if(verbose)
cat("** new best ** (pLOD increased by ", round(curplod - curbestplod, 4),
")\n", sep="")
curbest <- qtl
curbestplod <- curplod
}
if(keeptrace) {
temp <- list(chr=qtl$chr, pos=qtl$pos)
attr(temp, "formula") <- deparseQTLformula(formula)
attr(temp, "pLOD") <- curplod
class(temp) <- c("compactqtl", "list")
temp <- list(temp)
names(temp) <- i
thetrace <- c(thetrace, temp)
}
if(n.qtl >= max.qtl) break
}
if(verbose) cat(" -Starting backward deletion\n")
while(n.qtl > 1) {
i <- i+1
out <- fitqtl(cross, pheno.col, qtl, covar=covar, formula=formula,
method=method, model=model, dropone=TRUE, get.ests=FALSE,
run.checks=FALSE, tol=tol, maxit=maxit, forceXcovar=forceXcovar)$result.drop
formulas <- attr(out, "formulas")
lods <- attr(out, "lods")
rn <- rownames(out)
# ignore things with covariates
wh <- c(grep("^[Qq][0-9]+$", rn),
grep("^[Qq][0-9]+:[Qq][0-9]+$", rn))
out <- out[wh,,drop=FALSE]
formulas <- formulas[wh]
lods <- lods[wh]
# need to calculate penalized LOD scores here
plod <- rep(NA, length(lods))
for(modi in seq(along=plod))
plod[modi] <- calc.plod(lods[modi], countqtlterms(formulas[modi], ignore.covar=TRUE),
penalties=penalties)
maxplod <- max(plod, na.rm=TRUE)
wh <- which(!is.na(plod) & plod==maxplod)
if(length(wh) > 1) wh <- sample(wh, 1)
todrop <- rownames(out)[wh]
if(verbose) cat(" ---Dropping", todrop, "\n")
if(length(grep(":", todrop)) > 0) { # dropping an interaction
theterms <- attr(terms(formula), "factors")
wh <- colnames(theterms)==todrop
if(!any(wh)) stop("Confusion about what interation to drop!")
theterms <- colnames(theterms)[!wh]
formula <- as.formula(paste("y~", paste(theterms, collapse="+")))
}
else {
numtodrop <- as.numeric(substr(todrop, 2, nchar(todrop)))
theterms <- attr(terms(formula), "factors")
cn <- colnames(theterms)
g <- c(grep(paste("^[Qq]", numtodrop, "$", sep=""), cn),
grep(paste("^[Qq]", numtodrop, ":", sep=""), cn),
grep(paste(":[Qq]", numtodrop, "$", sep=""), cn))
cn <- cn[-g]
formula <- as.formula(paste("y~", paste(cn, collapse="+")))
if(n.qtl > numtodrop) {
for(j in (numtodrop+1):n.qtl)
formula <- reviseqtlnuminformula(formula, j, j-1)
}
qtl <- dropfromqtl(qtl, index=numtodrop)
qtl$name <- qtl$altname <- paste("Q", 1:qtl$n.qtl, sep="")
n.qtl <- n.qtl - 1
}
# call fitqtl again, just in case
fit <- fitqtl(cross, pheno.col, qtl, covar=covar, formula=formula,
method=method, model=model, dropone=FALSE, get.ests=FALSE,
run.checks=FALSE, tol=tol, maxit=maxit, forceXcovar=forceXcovar)
lod <- fit$result.full[1,4] - lod0
if(require.fullrank && attr(fit, "matrix.rank") < attr(fit, "matrix.ncol")) lod <- 0
curplod <- calc.plod(lod, countqtlterms(formula, ignore.covar=TRUE),
penalties=penalties)
if(verbose)
cat(" no.qtl = ", n.qtl, " pLOD =", curplod, " formula:",
deparseQTLformula(formula), "\n")
if(verbose > 1)
cat(" qtl:", paste(qtl$chr, round(qtl$pos,1), sep=":"), "\n")
attr(qtl, "formula") <- deparseQTLformula(formula)
attr(qtl, "pLOD") <- curplod
if(refine.locations) {
if(verbose) cat(" ---Refining positions\n")
if(!is.null(qtl)) {
rqtl <- refineqtl(cross, pheno.col=pheno.col, qtl=qtl,
covar=covar, formula=formula, method=method, model=model,
verbose=verbose.scan, incl.markers=incl.markers,
keeplodprofile=FALSE, forceXcovar=forceXcovar)
if(any(rqtl$pos != qtl$pos)) { # updated positions
if(verbose) cat(" --- Moved a bit\n")
qtl <- rqtl
fit <- fitqtl(cross, pheno.col, qtl, covar=covar, formula=formula,
method=method, model=model, dropone=FALSE, get.ests=FALSE,
run.checks=FALSE, tol=tol, maxit=maxit, forceXcovar=forceXcovar)
lod <- fit$result.full[1,4] - lod0
if(require.fullrank && attr(fit, "matrix.rank") < attr(fit, "matrix.ncol")) lod <- 0
curplod <- calc.plod(lod, countqtlterms(formula, ignore.covar=TRUE),
penalties=penalties)
attr(qtl, "pLOD") <- curplod
}
}
}
if(curplod > curbestplod) {
if(verbose)
cat("** new best ** (pLOD increased by ", round(curplod - curbestplod, 4),
")\n", sep="")
curbestplod <- curplod
curbest <- qtl
}
if(keeptrace) {
temp <- list(chr=qtl$chr, pos=qtl$pos)
attr(temp, "formula") <- deparseQTLformula(formula)
attr(temp, "pLOD") <- curplod
class(temp) <- c("compactqtl", "list")
temp <- list(temp)
names(temp) <- i
thetrace <- c(thetrace, temp)
}
}
# re-form the qtl
if(!is.null(curbest)) {
chr <- curbest$chr
pos <- curbest$pos
o <- order(factor(chr, levels=names(cross$geno)), pos)
qtl <- makeqtl(cross, chr[o], pos[o], what=qtlmethod)
# need to redo numbering in formula
formula <- as.formula(attr(curbest, "formula"))
if(length(chr) > 1) {
n.qtl <- length(chr)
for(i in 1:n.qtl)
formula <- reviseqtlnuminformula(formula, i, n.qtl+i)
for(i in 1:n.qtl)
formula <- reviseqtlnuminformula(formula, n.qtl+o[i], i)
}
if(keeplodprofile) {
if(verbose) cat(" ---One last pass through refineqtl\n")
qtl <- refineqtl(cross, pheno.col=pheno.col, qtl=qtl,
covar=covar, formula=formula, method=method, model=model,
verbose=verbose.scan, incl.markers=incl.markers,
keeplodprofile=TRUE, forceXcovar=forceXcovar)
}
attr(qtl, "formula") <- deparseQTLformula(formula)
attr(qtl, "pLOD") <- attr(curbest, "pLOD")
curbest <- qtl
}
else {
curbest <- numeric(0)
class(curbest) <- "qtl"
attr(curbest,"pLOD") <- 0
}
if(keeptrace)
attr(curbest, "trace") <- thetrace
attr(curbest, "formula") <- deparseQTLformula(attr(curbest, "formula"), TRUE)
attr(curbest, "penalties") <- penalties
curbest
}
######################################################################
# check initial qtl model for appropriateness
######################################################################
checkStepwiseqtlStart <-
function(qtl, formula, covar=NULL)
{
if(is.character(formula)) formula <- as.formula(formula)
formula <- checkformula(formula, qtl$altname, colnames(covar))
theterms <- attr(terms(formula), "factors")[-1,,drop=FALSE]
rn <- rownames(theterms)
# make sure that all covariates in covar exist in the formula
if(!is.null(covar)) {
covarnam <- colnames(covar)
m <- is.na(match(covarnam, rn))
if(any(m)) {
toadd <- covarnam[m]
warning("Adding ", paste(toadd, collapse="+"), " to formula")
formula <- as.formula(paste(deparseQTLformula(formula), "+", paste(toadd, collapse="+"), sep=""))
theterms <- attr(terms(formula), "factors")[-1,,drop=FALSE]
rn <- rownames(theterms)
}
# make sure there are no QTL:covariate interactions
theqtl <- grep("^Q[0-9]+$", rn)
thecovar <- seq(along=rn)[-theqtl]
if(any(apply(theterms[thecovar,,drop=FALSE], 1, sum)>1))
stop("We can't yet handle QTL:covariate or covariate:covariate interactions")
}
# make sure that any QTL in formula exist in object
theqtl <- grep("^Q[0-9]+$", rn)
thecovar <- seq(along=rn)[-theqtl]
qtlindex <- as.numeric(substr(rn[theqtl], 2, nchar(rn[theqtl])))
wh <- qtlindex < 0 | qtlindex > length(qtl$chr)
if(any(wh))
stop("QTL ", paste(rn[theqtl][wh], collapse=" "), " not in qtl object")
# make sure that there are not any extraneous terms
if(length(thecovar) > 0) {
if(is.null(covar))
stop("Extraneous terms in formula: ", paste(rn[thecovar], collapse=" "))
else {
wh <- is.na(match(rn[thecovar], colnames(covar)))
if(any(wh))
stop("Extraneous terms in formula: ", paste(rn[thecovar][wh], collapse=" "))
}
}
# if any QTL not referred to in formula, drop them from the QTL object
todrop <- seq(along=qtl$chr)[-qtlindex]
if(length(todrop) > 0) {
oldnum <- seq(along=qtl$chr)[-todrop]
newnum <- order(oldnum)
formula <- reviseqtlnuminformula(formula, oldnum, newnum)
qtl <- dropfromqtl(qtl, todrop)
}
return(list(qtl=qtl, formula=as.formula(formula)))
}
######################################################################
# penalized LOD score
######################################################################
calc.plod <-
function(lod, nterms, type=c("f2","bc"), penalties) {
nterms <- nterms[1:3]
if(any(penalties==Inf & nterms > 0)) return(-Inf)
as.numeric(lod - sum((nterms*penalties)[nterms > 0]))
}
######################################################################
# count terms in a model, for use by plod
######################################################################
countqtlterms <-
function(formula, ignore.covar=TRUE)
{
if(is.character(formula)) formula <- as.formula(formula)
factors <- attr(terms(formula), "factors")[-1,,drop=FALSE]
if(any(factors > 1)) {
warning("some formula terms > 1; may be a problem with the formula:\n ", deparseQTLformula(formula))
factors[factors > 1] <- 1
}
nterm <- apply(factors, 2, sum)
if(any(nterm>2))
stop("Can't deal with higher-order interactions\n")
# need to check for QTL x covariate interactions in here!
if(ignore.covar) {
cn <- colnames(factors)
wh <- c(grep("^[Qq][0-9]+$", cn),
grep("^[Qq][0-9]+:[Qq][0-9]+$", cn))
rn <- rownames(factors)
wh2 <- c(grep("^[Qq][0-9]+$", rn),
grep("^[Qq][0-9]+:[Qq][0-9]+$", rn))
factors <- factors[wh2,wh, drop=FALSE]
}
nterm <- apply(factors, 2, sum)
nmain <- sum(nterm==1)
if(all(nterm==1))
return(c(main=nmain, intH=0, intL=0, inttot=0))
n.int <- sum(nterm==2)
if(n.int <=1) # 0 or 1 interactions, so no need to figure them out
return(c(main=nmain, intH=0, intL=n.int, inttot=n.int))
factors <- factors[,nterm==2, drop=FALSE]
wh <- apply(factors, 2, function(a) which(a==1))
u <- sort(unique(as.numeric(wh)))
grp <- rep(NA, length(u))
names(grp) <- u
ngrp <- 0
nint <- NULL
for(i in 1:ncol(wh)) {
thegrp <- grp[as.character(wh[,i])]
if(all(!is.na(thegrp))) {
nint[as.character(thegrp[1])] <-
sum(nint[unique(as.character(thegrp))]) + 1
grp[grp==thegrp[1] | grp==thegrp[2]] <- thegrp[1]
}
else if(any(!is.na(thegrp))) {
grp[as.character(wh[,i])] <- thegrp[!is.na(thegrp)]
nint[as.character(thegrp[!is.na(thegrp)])] <-
nint[as.character(thegrp[!is.na(thegrp)])] + 1
}
else {
ngrp <- ngrp+1
grp[as.character(wh[,i])] <- ngrp
nint[as.character(ngrp)] <- 1
}
}
nint <- nint[as.character(unique(grp))]
nL <- sum(nint>0)
nH <- sum(nint)-nL
c(main=nmain, intH=nH, intL=nL, inttot=n.int)
}
######################################################################
# calculate penalties for pLOD using scantwo permutation results.
######################################################################
calc.penalties <-
function(perms, alpha=0.05, lodcolumn)
{
if(missing(perms) || !inherits(perms, "scantwoperm"))
stop("You must include permutation results from scantwo.")
if("AA" %in% names(perms)) { # X-chr-specific penalties
if(missing(lodcolumn)) lodcolumn <- NULL
return(calc.penalties.X(perms, alpha, lodcolumn))
}
if(missing(lodcolumn) || is.null(lodcolumn)) {
if(is.matrix(perms[[1]]) && ncol(perms[[1]]) > 1)
lodcolumn <- 1:ncol(perms[[1]])
else lodcolumn <- 1
}
if(length(lodcolumn)>1) {
result <- NULL
for(i in seq(along=lodcolumn)) {
temp <- calc.penalties(perms, alpha, lodcolumn[i])
result <- rbind(result, temp)
}
dimnames(result) <- list(colnames(perms[[1]])[lodcolumn], names(temp))
return(result)
}
if(is.matrix(perms[[1]]) && ncol(perms[[1]]) >1) {
if(lodcolumn < 1 || lodcolumn > ncol(perms[[1]]))
stop("lodcolumn misspecified")
for(i in seq(along=perms))
perms[[i]] <- perms[[i]][,lodcolumn,drop=FALSE]
}
qu <- summary(perms, alpha=alpha)
if(!("one" %in% names(qu)))
stop("You need to re-run scantwo permutations with R/qtl version >= 1.09.")
if(length(alpha)>1) {
penalties <- cbind(qu[["one"]], qu[["int"]], qu[["fv1"]]-qu[["one"]])
colnames(penalties) <- c("main","heavy", "light")
}
else {
penalties <- c(qu[["one"]], qu[["int"]], qu[["fv1"]]-qu[["one"]])
names(penalties) <- c("main","heavy", "light")
}
penalties
}
# end of stepwiseqtl.R
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.