Nothing
######################################################################
#
# refineqtl.R
#
# copyright (c) 2006-2019, Karl W. Broman
# last modified Dec, 2019
# first written Jun, 2006
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 3, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# merchantability or fitness for a particular purpose. See the GNU
# General Public License, version 3, for more details.
#
# A copy of the GNU General Public License, version 3, is available
# at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: refineqtl, plotLodProfile
#
######################################################################
######################################################################
# this is like scanqtl, though the positions are all fixed loci;
# it calls scanqtl iteratively, trying to find the best positions for
# each QTL.
#
# the method is like that of that described by Zeng and colleagues
# regarding MIM; each QTL is slid from one end of the chromosome to
# the other, or to the next flanking QTLs, if there are linked ones
#
# maxit is the maximum number of iterations (going through all QTLs
# in each iteration) to perform
######################################################################
refineqtl <-
function(cross, pheno.col=1, qtl, chr, pos, qtl.name, covar=NULL, formula,
method=c("imp", "hk"), model=c("normal", "binary"), verbose=TRUE, maxit=10,
incl.markers=TRUE, keeplodprofile=TRUE, tol=1e-4, maxit.fitqtl=1000,
forceXcovar=FALSE)
{
method <- match.arg(method)
model <- match.arg(model)
if( !inherits(cross, "cross") )
stop("The cross argument must be an object of class \"cross\".")
# allow formula to be a character string
if(!missing(formula) && is.character(formula))
formula <- as.formula(formula)
if(!is.null(covar) && !is.data.frame(covar)) {
if(is.matrix(covar) && is.numeric(covar))
covar <- as.data.frame(covar, stringsAsFactors=TRUE)
else stop("covar should be a data.frame")
}
if(LikePheVector(pheno.col, nind(cross), nphe(cross))) {
cross$pheno <- cbind(pheno.col, cross$pheno)
pheno.col <- 1
}
if(!missing(qtl) && (!missing(chr) || !missing(pos) || !missing(qtl.name)))
warning("qtl argument is provided, and so chr, pos and qtl.name are ignored.")
if(missing(qtl) && (missing(chr) || missing(pos)))
stop("Provide either qtl or both chr and pos.")
if(!missing(qtl)) {
chr <- qtl$chr
pos <- qtl$pos
}
else { # chr and pos provided
if(missing(qtl.name)) {
if(method=="imp")
qtl <- makeqtl(cross, chr=chr, pos=pos, what="draws")
else
qtl <- makeqtl(cross, chr=chr, pos=pos, what="prob")
}
else {
if(method=="imp")
qtl <- makeqtl(cross, chr=chr, pos=pos, qtl.name=qtl.name, what="draws")
else
qtl <- makeqtl(cross, chr=chr, pos=pos, qtl.name=qtl.name, what="prob")
}
}
if(method=="imp") {
if(!("geno" %in% names(qtl))) {
if("prob" %in% names(qtl)) {
warning("The qtl object doesn't contain imputations; using method=\"hk\".")
method <- "hk"
}
else
stop("The qtl object needs to be created with makeqtl with what=\"draws\".")
}
}
else {
if(!("prob" %in% names(qtl))) {
if("geno" %in% names(qtl)) {
warning("The qtl object doesn't contain QTL genotype probabilities; using method=\"imp\".")
method <- "imp"
}
else
stop("The qtl object needs to be created with makeqtl with what=\"prob\".")
}
}
if(!all(chr %in% names(cross$geno)))
stop("Chr ", paste(unique(chr[!(chr %in% cross$geno)]), sep=" "),
" not found in cross.")
if(verbose > 1) scanqtl.verbose <- TRUE
else scanqtl.verbose <- FALSE
cross <- subset(cross, chr=as.character(unique(chr))) # pull out just those chromosomes
# save map in the object
map <- attr(qtl, "map")
if(qtl$n.ind != nind(cross)) {
warning("No. individuals in qtl object doesn't match that in the input cross; re-creating qtl object.")
if(method=="imp")
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
else
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="prob")
attr(qtl, "map") <- map
}
if(method=="imp" && dim(qtl$geno)[3] != dim(cross$geno[[1]]$draws)[3]) {
warning("No. imputations in qtl object doesn't match that in the input cross; re-creating qtl object.")
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
attr(qtl, "map") <- map
}
# minimum distance between pseudomarkers
if(is.null(map))
stop("Input qtl object should contain the genetic map.")
mind <- min(unlist(lapply(map, function(a) { if(is.matrix(a)) a <- a[1,];
if(length(a) <= 1) return(NULL) # deal with case of a single marker
min(diff(a)) })))/2
if(mind <= 0) mind <- 1e-6
# check phenotypes and covariates; drop ind'ls with missing values
if(length(pheno.col) > 1) {
pheno.col <- pheno.col[1]
warning("refineqtl can take just one phenotype; only the first will be used")
}
if(is.character(pheno.col)) {
num <- find.pheno(cross, pheno.col)
if(is.na(num))
stop("Couldn't identify phenotype \"", pheno.col, "\"")
pheno.col <- num
}
if(pheno.col < 1 || pheno.col > nphe(cross))
stop("pheno.col should be between 1 and ", nphe(cross))
pheno <- cross$pheno[,pheno.col]
if(!is.null(covar) && nrow(covar) != length(pheno))
stop("nrow(covar) != no. individuals in cross.")
if(!is.null(covar)) phcovar <- cbind(pheno, covar)
else phcovar <- as.data.frame(pheno, stringsAsFactors=TRUE)
hasmissing <- apply(phcovar, 1, function(a) any(is.na(a)))
if(all(hasmissing))
stop("All individuals are missing phenotypes or covariates.")
if(any(hasmissing)) {
origcross <- cross
origqtl <- qtl
cross <- subset(cross, ind=!hasmissing)
pheno <- pheno[!hasmissing]
if(!is.null(covar)) covar <- covar[!hasmissing,,drop=FALSE]
if(method=="imp")
qtl$geno <- qtl$geno[!hasmissing,,,drop=FALSE]
else
qtl$prob <- lapply(qtl$prob, function(a) a[!hasmissing,,drop=FALSE])
qtl$n.ind <- sum(!hasmissing)
}
# if missing formula, include the additive QTL plus all covariates
if(missing(formula)) {
formula <- paste("y ~", paste(qtl$altname, collapse="+"))
if(!is.null(covar))
formula <- paste(formula, "+", paste(colnames(covar), collapse="+"))
formula <- as.formula(formula)
}
# drop covariates that are not in the formula
if(!is.null(covar)) {
theterms <- rownames(attr(terms(formula), "factors"))
m <- match(colnames(covar), theterms)
if(all(is.na(m))) covar <- NULL
else covar <- covar[,!is.na(m),drop=FALSE]
}
formula <- checkformula(formula, qtl$altname, colnames(covar))
# identify which QTL are in the model formula
tovary <- sort(parseformula(formula, qtl$altname, colnames(covar))$idx.qtl)
if(length(tovary) != qtl$n.qtl)
reducedqtl <- dropfromqtl(qtl, index=(1:qtl$n.qtl)[-tovary])
else reducedqtl <- qtl
# if a QTL is missing from the formula, we need to revise the formula, moving
# everything over, for use in scanqtl
if(any(1:length(tovary) != tovary)) {
tempform <- strsplit(deparseQTLformula(formula), " *~ *")[[1]][2]
terms <- strsplit(tempform, " *\\+ *")[[1]]
for(j in seq(along=terms)) {
if(length(grep(":", terms[j])) > 0) { # interaction
temp <- strsplit(terms[j], " *: *")[[1]]
for(k in seq(along=temp)) {
g <- grep("^[Qq][0-9]+$", temp[k])
if(length(g) > 0) {
num <- as.numeric(substr(temp[k], 2, nchar(temp[k])))
temp[k] <- paste("Q", which(tovary == num), sep="")
}
}
terms[j] <- paste(temp, collapse=":")
}
else {
g <- grep("^[Qq][0-9]+$", terms[j])
if(length(g) > 0) {
num <- as.numeric(substr(terms[j], 2, nchar(terms[j])))
terms[j] <- paste("Q", which(tovary == num), sep="")
}
}
}
formula <- as.formula(paste("y ~", paste(terms, collapse=" + ")))
}
curpos <- pos[tovary]
chrnam <- chr[tovary]
if(verbose) cat("pos:", curpos, "\n")
converged <- FALSE
oldo <- NULL
lc <- length(chrnam)
lastout <- vector("list", length(curpos))
names(lastout) <- qtl$name[tovary]
sexpgm <- getsex(cross)
cross.attr <- attributes(cross)
for(i in 1:maxit) {
if(keeplodprofile) # do drop-one analysis
basefit <- fitqtlengine(pheno=pheno, qtl=reducedqtl, covar=covar, formula=formula,
method=method, model=model, dropone=TRUE, get.ests=FALSE,
run.checks=FALSE, cross.attr=cross.attr,
crosstype=crosstype(cross), sexpgm=sexpgm,
tol=tol, maxit=maxit.fitqtl, forceXcovar=forceXcovar)
else
basefit <- fitqtlengine(pheno=pheno, qtl=reducedqtl, covar=covar, formula=formula,
method=method, model=model, dropone=FALSE, get.ests=FALSE,
run.checks=FALSE, cross.attr=cross.attr,
crosstype=crosstype(cross), sexpgm=sexpgm,
tol=tol, maxit=maxit.fitqtl, forceXcovar=forceXcovar)
if(i==1) {
origlod <- curlod <- thisitlod <- basefit$result.full[1,4]
origpos <- curpos
}
if(verbose) cat("Iteration", i, "\n")
o <- sample(lc)
# make sure the first here was not the last before
if(!is.null(oldo))
while(o[1] != oldo[lc])
o <- sample(lc)
oldo <- o
newpos <- curpos
for(j in o) {
otherchr <- chrnam[-j]
otherpos <- newpos[-j]
thispos <- as.list(newpos)
if(any(otherchr == chrnam[j])) { # linked QTLs
linkedpos <- otherpos[otherchr==chr[j]]
if(any(linkedpos < newpos[j]))
low <- max(linkedpos[linkedpos < newpos[j]])
else low <- -Inf
if(any(linkedpos > newpos[j]))
high <- min(linkedpos[linkedpos > newpos[j]])
else high <- Inf
thispos[[j]] <- c(low, high)
}
else
thispos[[j]] <- c(-Inf, Inf)
out <- scanqtl(cross=cross, pheno.col=pheno.col, chr=chrnam, pos=thispos,
covar=covar, formula=formula, method=method, model=model,
incl.markers=incl.markers,
verbose=scanqtl.verbose, tol=tol, maxit=maxit.fitqtl,
forceXcovar=forceXcovar)
lastout[[j]] <- out
newpos[j] <- as.numeric(strsplit(names(out)[!is.na(out) & out==max(out,na.rm=TRUE)],"@")[[1]][2])
if(verbose) {
cat(" Q", j, " pos: ", curpos[j], " -> ", newpos[j], "\n", sep="")
cat(" LOD increase: ", round(max(out, na.rm=TRUE) - curlod, 3), "\n")
}
curlod <- max(out, na.rm=TRUE)
}
if(verbose) {
cat("all pos:", curpos, "->", newpos, "\n")
cat("LOD increase at this iteration: ", round(curlod - thisitlod, 3), "\n")
}
thisitlod <- curlod
if(max(abs(curpos - newpos)) < mind) {
converged <- TRUE
break
}
curpos <- newpos
reducedqtl <- replaceqtl(cross, reducedqtl, seq(length(curpos)),
reducedqtl$chr, curpos, reducedqtl$name)
}
if(verbose) {
cat("overall pos:", origpos, "->", newpos, "\n")
cat("LOD increase overall: ", round(curlod - origlod, 3), "\n")
}
if(!converged) warning("Didn't converge.")
# do the qtl have custom names?
g <- grep("^.+@[0-9\\.]+$", qtl$name)
if(length(g) == length(qtl$name)) thenames <- NULL
else thenames <- qtl$name
if(any(hasmissing)) {
qtl <- origqtl
cross <- origcross
}
for(j in seq(along=tovary))
qtl <- replaceqtl(cross, qtl, tovary[j], chrnam[j], newpos[j])
if(!is.null(thenames)) qtl$name <- thenames
if(keeplodprofile) {
# subtract off the results from the drop-one analysis from the LOD profiles
dropresult <- basefit$result.drop
if(is.null(dropresult)) {
if(length(lastout)==1) {
dropresult <- rbind(c(NA,NA, basefit$result.full[1,4]))
rownames(dropresult) <- names(lastout)
}
else
stop("There's a problem: need dropresult, but didn't obtain one.")
}
rn <- rownames(dropresult)
qn <- names(lastout)
for(i in seq(along=lastout)) {
if(sum(rn==qn[i])>1) # ack! multiple QTL at same position
warning("Multiple QTL at the same location.")
lastout[[i]] <- lastout[[i]] - (max(lastout[[i]], na.rm=TRUE) - max(dropresult[rn==qn[i],3]))
pos <- as.numeric(matrix(unlist(strsplit(names(lastout[[i]]), "@")),byrow=TRUE,ncol=2)[,2])
chr <- rep(qtl$chr[tovary][i], length(pos))
lastout[[i]] <- data.frame(chr=chr, pos=pos, lod=as.numeric(lastout[[i]]), stringsAsFactors=TRUE)
}
names(lastout) <- qtl$name[tovary]
# make the profiles scanone objects
for(i in seq(along=lastout)) {
class(lastout[[i]]) <- c("scanone", "data.frame")
thechr <- qtl$chr[i]
if(method=="imp")
detailedmap <- attr(cross$geno[[thechr]]$draws,"map")
else
detailedmap <- attr(cross$geno[[thechr]]$prob,"map")
if(is.matrix(detailedmap)) detailedmap <- detailedmap[1,]
r <- range(lastout[[i]][,2])+c(-1e-5, 1e-5)
rn <- names(detailedmap)[detailedmap>=r[1] & detailedmap<=r[2]]
o <- grep("^loc-*[0-9]+",rn)
if(length(o) > 0) # inter-marker locations cited as "c*.loc*"
rn[o] <- paste("c",thechr,".",rn[o],sep="")
# if(length(rn) != nrow(lastout[[i]])) return(list(lastout[[i]], rn, detailedmap))
if(length(rn) == nrow(lastout[[i]])) rownames(lastout[[i]]) <- rn
}
attr(qtl, "lodprofile") <- lastout
}
# if there's a pLOD attribute, revise it
if("pLOD" %in% names(attributes(qtl)) && curlod > origlod)
attr(qtl,"pLOD") <- attr(qtl,"pLOD") + curlod - origlod
qtl
}
######################################################################
# plotLodProfile
#
# This is for creating a plot of 1-d LOD profiles calculated within
# refineqtl.
######################################################################
plotLodProfile <-
function(qtl, chr, incl.markers=TRUE, gap=25, lwd=2, lty=1, col="black",
qtl.labels=TRUE, mtick=c("line", "triangle"),
show.marker.names=FALSE, alternate.chrid=FALSE, add=FALSE,
showallchr=FALSE, labelsep=5, ...)
{
if(!inherits(qtl, "qtl"))
stop("Input qtl is not a qtl object")
if(nqtl(qtl) == 0)
stop("Null QTL model; there are no profiles to plot")
lodprof <- attr(qtl, "lodprofile")
if(is.null(lodprof))
stop("You must first run refineqtl, using keeplodprofile=TRUE")
m <- match(qtl$name, names(lodprof))
if(any(is.na(m)))
qtl <- dropfromqtl(qtl, index=which(is.na(m)), drop.lod.profile=FALSE)
# reorder qtl by position
if(qtl$n.qtl > 1) {
chrindex <- match(qtl$chr, names(attr(qtl, "map")))
if(any(is.na(chrindex)))
stop("Cannot find chr ", qtl$chr[is.na(chrindex)])
neworder <- order(chrindex, qtl$pos)
if(any(neworder != seq(qtl$n.qtl))) {
qtl <- reorderqtl(qtl, neworder)
lodprof <- attr(qtl, "lodprofile")
}
}
n.qtl <- length(lodprof)
if(length(lwd) == 1) lwd <- rep(lwd, n.qtl)
else {
if(length(lwd) != n.qtl) {
warning("lwd should have length 1 or ", n.qtl)
lwd <- rep(lwd[1], n.qtl)
}
else lwd <- lwd[neworder]
}
if(length(lty) == 1) lty <- rep(lty, n.qtl)
else {
if(length(lty) != n.qtl) {
warning("lty should have length 1 or ", n.qtl)
lty <- rep(lty[1], n.qtl)
}
else lty <- lty[neworder]
}
if(length(col) == 1) col <- rep(col, n.qtl)
else {
if(length(col) != n.qtl) {
warning("col should have length 1 or ", n.qtl)
if(length(col) < n.qtl)
col <- rep(col, n.qtl)[1:n.qtl]
else
col <- col[1:n.qtl]
}
else col <- col[neworder]
}
map <- attr(qtl, "map")
if(is.null(map))
stop("Input qtl object should contain the genetic map.")
thechr <- unique(qtl$chr)
orderedchr <- names(map)
if(showallchr) chr2keep <- seq(along=orderedchr)
else chr2keep <- which(!is.na(match(orderedchr, thechr)))
tempscan <- NULL
for(i in chr2keep) {
temp <- data.frame(chr=orderedchr[i],
pos=as.numeric(map[[i]]),
lod=NA, stringsAsFactors=TRUE)
rownames(temp) <- names(map[[i]])
tempscan <- rbind(tempscan, temp)
}
class(tempscan) <- c("scanone", "data.frame")
if(missing(chr)) {
if(showallchr) chr <- orderedchr
else chr <- thechr
}
dontskip <- which(!is.na(match(qtl$chr, chr)))
if(length(dontskip)==0)
stop("Nothing to plot.")
ymax <- max(sapply(lodprof[dontskip], function(a) max(a[,3], na.rm=TRUE)))
begend <- matrix(unlist(tapply(tempscan[,2],tempscan[,1],range)),ncol=2,byrow=TRUE)
rownames(begend) <- unique(tempscan[,1])
begend <- begend[as.character(chr),,drop=FALSE]
len <- begend[,2]-begend[,1]
if(length(chr)==1) start <- 0
else start <- c(0,cumsum(len+gap))-c(begend[,1],0)
names(start) <- chr
dots <- list(...)
if(!add) {
if("ylim" %in% names(dots)) {
plot.scanone(tempscan, chr=chr, incl.markers=incl.markers, gap=gap,
mtick=mtick, show.marker.names=show.marker.names,
alternate.chrid=alternate.chrid, type="n", ...)
}
else {
if(qtl.labels)
ylim <- c(0, ymax+1)
else
ylim <- c(0, ymax)
plot.scanone(tempscan, chr=chr, incl.markers=incl.markers, gap=gap,
mtick=mtick, show.marker.names=show.marker.names,
alternate.chrid=alternate.chrid, type="n", ylim=ylim,
...)
}
}
for(i in dontskip) {
temp <- rbind(tempscan[tempscan[,1] != qtl$chr[i] |
(tempscan[,1] == qtl$chr[i] & (tempscan[,2] < min(lodprof[[i]][,2]) |
tempscan[,2] > max(lodprof[[i]][,2], na.rm=TRUE))),],
lodprof[[i]])
temp <- temp[order(match(temp[,1], orderedchr), temp[,2]),]
class(temp) <- c("scanone", "data.frame")
plot.scanone(temp, chr=chr, incl.markers=FALSE, gap=gap, add=TRUE,
col=col[i], lwd=lwd[i], lty=lty[i], ...)
if(qtl.labels) {
maxlod <- max(temp[,3], na.rm=TRUE)
maxpos <- median(temp[!is.na(temp[,3]) & temp[,3]==maxlod,2] + start[qtl$chr[i]])
d <- diff(par("usr")[3:4]*labelsep/100)
text(maxpos, maxlod + d, names(lodprof)[i], col=col[i], font=(lwd[i]>1)+1, ...)
}
}
invisible()
}
# end of refineqtl.R
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.