######################################################################
#
# fitqtl.R
#
# copyright (c) 2002-2019, Hao Wu and Karl W. Broman
# last modified Dec, 2019
# first written Apr, 2002
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 3, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# merchantability or fitness for a particular purpose. See the GNU
# General Public License, version 3, for more details.
#
# A copy of the GNU General Public License, version 3, is available
# at http://www.r-project.org/Licenses/GPL-3
#
# Part of the R/qtl package
# Contains: fitqtl, fitqtlengine, parseformula, summary.fitqtl,
# print.summary.fitqtl, mybinaryrep, deparseQTLformula
# printQTLformulanicely
#
######################################################################
######################################################################
#
# This is the function to fit a model and generate some tables
#
# Only Haley-Knott regression and multiple imputation are implemented.
#
######################################################################
fitqtl <-
function(cross, pheno.col=1, qtl, covar=NULL, formula, method=c("imp", "hk"),
model=c("normal", "binary"), dropone=TRUE, get.ests=FALSE,
run.checks=TRUE, tol=1e-4, maxit=1000, forceXcovar=FALSE)
{
# some input checking stuff in here
if( !inherits(cross, "cross") )
stop("The cross argument must be an object of class \"cross\".")
if( !inherits(qtl, "qtl") )
stop("The qtl argument must be an object of class \"qtl\".")
if(!is.null(covar) && !is.data.frame(covar)) {
if(is.matrix(covar) && is.numeric(covar))
covar <- as.data.frame(covar, stringsAsFactors=TRUE)
else stop("covar should be a data.frame")
}
if(LikePheVector(pheno.col, nind(cross), nphe(cross))) {
cross$pheno <- cbind(pheno.col, cross$pheno)
pheno.col <- 1
}
if(length(pheno.col) > 1) {
pheno.col <- pheno.col[1]
warning("fitqtl can take just one phenotype; only the first will be used")
}
if(is.character(pheno.col)) {
num <- find.pheno(cross, pheno.col)
if(is.na(num))
stop("Couldn't identify phenotype \"", pheno.col, "\"")
pheno.col <- num
}
if(pheno.col < 1 | pheno.col > nphe(cross))
stop("pheno.col values should be between 1 and the no. phenotypes")
pheno <- cross$pheno[,pheno.col]
if(!is.null(covar) && nrow(covar) != length(pheno))
stop("nrow(covar) != no. individuals in cross.")
method <- match.arg(method)
model <- match.arg(model)
if(model=="binary" && any(!is.na(pheno) & pheno != 0 & pheno != 1))
stop("For model=\"binary\", phenotypes must by 0 or 1.")
# allow formula to be a character string
if(!missing(formula) && is.character(formula))
formula <- as.formula(formula)
if(method=="imp") {
if(!("geno" %in% names(qtl))) {
if("prob" %in% names(qtl)) {
warning("The qtl object doesn't contain imputations; using method=\"hk\".")
method <- "hk"
}
else
stop("The qtl object needs to be created with makeqtl with what=\"draws\".")
}
}
else {
if(!("prob" %in% names(qtl))) {
if("geno" %in% names(qtl)) {
warning("The qtl object doesn't contain QTL genotype probabilities; using method=\"imp\".")
method <- "imp"
}
else
stop("The qtl object needs to be created with makeqtl with what=\"prob\".")
}
}
if(qtl$n.ind != nind(cross)) {
warning("No. individuals in qtl object doesn't match that in the input cross; re-creating qtl object.")
if(method=="imp")
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
else
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="prob")
}
if(method=="imp" && dim(qtl$geno)[3] != dim(cross$geno[[1]]$draws)[3]) {
warning("No. imputations in qtl object doesn't match that in the input cross; re-creating qtl object.")
qtl <- makeqtl(cross, qtl$chr, qtl$pos, qtl$name, what="draws")
}
fitqtlengine(pheno=pheno, qtl=qtl, covar=covar, formula=formula,
method=method, model=model, dropone=dropone, get.ests=get.ests,
run.checks=run.checks, cross.attr=attributes(cross), crosstype=crosstype(cross),
sexpgm=getsex(cross), tol=tol, maxit=maxit, forceXcovar=forceXcovar)
}
fitqtlengine <-
function(pheno, qtl, covar=NULL, formula, method=c("imp", "hk"),
model=c("normal", "binary"),
dropone=TRUE, get.ests=FALSE, run.checks=TRUE, cross.attr,
crosstype, sexpgm, tol, maxit, forceXcovar=FALSE)
{
model <- match.arg(model)
method <- match.arg(method)
# local variables
n.ind <- qtl$n.ind # number of individuals
n.qtl <- qtl$n.qtl # number of selected markers
n.gen <- qtl$n.gen # number of genotypes
if(method=="imp")
n.draws <- dim(qtl$geno)[3] # number of draws
if( is.null(covar) ) # number of covariates
n.covar <- 0
else
n.covar <- ncol(covar)
# if formula is missing, build one
# all QTLs and covariates will be additive by default
if(missing(formula)) {
tmp.Q <- paste("Q", 1:n.qtl, sep="") # QTL term names
formula <- "y~Q1"
if(n.qtl > 1)
for (i in 2:n.qtl)
formula <- paste(formula, tmp.Q[i], sep="+")
if (n.covar) { # if covariate is not empty
tmp.C <- colnames(covar) # covariate term names
for(i in 1:n.covar)
formula <- paste(formula, tmp.C[i], sep="+")
}
formula <- as.formula(formula)
}
# check input formula
if(run.checks) {
formula <- checkformula(formula, qtl$altname, colnames(covar))
if(qtl$n.ind != length(pheno))
stop("No. individuals in qtl object doesn't match length of input phenotype.")
# drop covariates that are not in the formula
if(!is.null(covar)) {
theterms <- rownames(attr(terms(formula), "factors"))
m <- match(colnames(covar), theterms)
if(all(is.na(m))) covar <- NULL
else covar <- covar[,!is.na(m),drop=FALSE]
}
# check phenotypes and covariates; drop ind'ls with missing values
if(!is.null(covar)) phcovar <- cbind(pheno, covar)
else phcovar <- as.data.frame(pheno, stringsAsFactors=TRUE)
if(any(is.na(phcovar))) {
if(ncol(phcovar)==1) hasmissing <- is.na(phcovar)
else hasmissing <- apply(phcovar, 1, function(a) any(is.na(a)))
if(all(hasmissing))
stop("All individuals are missing phenotypes or covariates.")
if(any(hasmissing)) {
warning("Dropping ", sum(hasmissing), " individuals with missing phenotypes.\n")
pheno <- pheno[!hasmissing]
n.ind <- qtl$n.ind <- sum(!hasmissing)
if(method=="imp")
qtl$geno <- qtl$geno[!hasmissing,,,drop=FALSE]
else
qtl$prob <- lapply(qtl$prob, function(a) a[!hasmissing,,drop=FALSE])
if(!is.null(covar)) covar <- covar[!hasmissing,,drop=FALSE]
# subset sexpgm
for(i in seq(along=sexpgm))
if(!is.null(sexpgm[[i]]))
sexpgm[[i]] <- sexpgm[[i]][!hasmissing]
}
}
}
# parse the input formula
p <- parseformula(formula, qtl$altname, colnames(covar))
# make an array n.gen.QC to represent the genotype numbers
# for all input QTLs and covariates. For covariates the
# number of genotyps is 1. This makes programming easier
n.gen.QC <- c(n.gen[p$idx.qtl]-1, rep(1, p$n.covar))
# covariates to be passed to C function
# This is done in case of that user input covar but has no covar in formula
covar.C <- NULL
if(!is.null(p$idx.covar))
covar.C <- as.matrix(covar[,p$idx.covar,drop=FALSE])
sizefull <- 1+sum(n.gen.QC)
if(p$n.int > 0) {
form <- p$formula.intmtx*n.gen.QC
if(!is.matrix(form)) {
sizefull <- sizefull + prod(form[form!=0])
}
else {
form <- apply(form,2,function(a) prod(a[a != 0]))
sizefull <- sizefull + sum(form)
}
}
if(method != "imp") {
# form genotype probabilities as a matrix
prob <- matrix(ncol=sum(qtl$n.gen[p$idx.qtl]), nrow=n.ind)
curcol <- 0
for(i in p$idx.qtl) {
prob[,curcol+1:n.gen[i]] <- qtl$prob[[i]]
curcol <- curcol + n.gen[i]
}
}
Xadjustment <- scanoneXnull(crosstype, sexpgm, cross.attr)
n.origcovar <- p$n.covar
if((sum(qtl$chrtype[p$idx.qtl]=="X") >= 1 || forceXcovar) && Xadjustment$adjustX) { # need to include X chromosome covariates
adjustX <- TRUE
n.newcovar <- ncol(Xadjustment$sexpgmcovar)
n.gen.QC <- c(n.gen.QC, rep(1, n.newcovar))
p$n.covar <- p$n.covar + n.newcovar
covar.C <- cbind(covar.C, Xadjustment$sexpgmcovar)
sizefull <- sizefull + n.newcovar
if(p$n.int==1)
p$formula.intmtx <- c(p$formula.intmtx, rep(0,n.newcovar))
if(p$n.int>1) {
for(i in 1:n.newcovar)
p$formula.intmtx <- rbind(p$formula.intmtx, rep(0,p$n.int))
}
}
else adjustX <- FALSE
# call C function to do the genome scan
if(model=="normal") {
if(method == "imp") {
z <- .C("R_fitqtl_imp",
as.integer(n.ind), # number of individuals
as.integer(p$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.integer(n.draws), # number of draws
as.integer(qtl$geno[,p$idx.qtl,]), # genotypes for selected marker
as.integer(p$n.covar), # number of covariate
as.double(covar.C), # covariate
as.integer(p$formula.intmtx), # formula matrix for interactive terms
as.integer(p$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(get.ests), # get estimates?
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
ests=as.double(rep(0,sizefull)),
ests.cov=as.double(rep(0,sizefull*sizefull)),
design.mat=as.double(rep(0,sizefull*n.ind)),
matrix.rank=as.integer(0), # on return, minimum of matrix rank across imputations
resid=as.double(rep(0,n.ind)), # on return, the average residuals across imputations
PACKAGE="qtl")
}
else {
z <- .C("R_fitqtl_hk",
as.integer(n.ind), # number of individuals
as.integer(p$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.double(prob), # QTL genotype probabilities
as.integer(p$n.covar), # number of covariate
as.double(covar.C), # covariates
as.integer(p$formula.intmtx), # formula matrix for interactive terms
as.integer(p$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(get.ests), # get estimates?
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
ests=as.double(rep(0,sizefull)),
ests.cov=as.double(rep(0,sizefull*sizefull)),
design.mat=as.double(rep(0,sizefull*n.ind)),
matrix.rank=as.integer(0), # on return, rank of matrix
resid=as.double(rep(0,n.ind)), # on return, residuals from the fit
PACKAGE="qtl")
}
}
else {
if(method=="imp") {
z <- .C("R_fitqtl_imp_binary",
as.integer(n.ind), # number of individuals
as.integer(p$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.integer(n.draws), # number of draws
as.integer(qtl$geno[,p$idx.qtl,]), # genotypes for selected marker
as.integer(p$n.covar), # number of covariate
as.double(covar.C), # covariate
as.integer(p$formula.intmtx), # formula matrix for interactive terms
as.integer(p$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(get.ests), # get estimates?
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
ests=as.double(rep(0,sizefull)),
ests.cov=as.double(rep(0,sizefull*sizefull)),
design.mat=as.double(rep(0,sizefull*n.ind)),
as.double(tol),
as.integer(maxit),
matrix.rank=as.integer(0), # on return, minimum of matrix rank across imputations
PACKAGE="qtl")
}
else {
z <- .C("R_fitqtl_hk_binary",
as.integer(n.ind), # number of individuals
as.integer(p$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.double(prob), # QTL genotype probabilities
as.integer(p$n.covar), # number of covariate
as.double(covar.C), # covariates
as.integer(p$formula.intmtx), # formula matrix for interactive terms
as.integer(p$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(get.ests), # get estimates?
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
ests=as.double(rep(0,sizefull)),
ests.cov=as.double(rep(0,sizefull*sizefull)),
design.mat=as.double(rep(0,sizefull*n.ind)),
# convergence
as.double(tol),
as.integer(maxit),
matrix.rank=as.integer(0), # on return, rank of matrix
PACKAGE="qtl")
}
}
matrix.rank <- z$matrix.rank
matrix.ncol <- sizefull
residuals <- z$resid
if(get.ests) {
# first, construct the new design matrix
# X = the matrix used in the C coe
# Z = the matrix we want
thenames <- qtl$name[p$idx.qtl]
if(n.covar > 0) thenames <- c(thenames,names(covar)[p$idx.covar])
ests <- z$ests
ests.cov <- matrix(z$ests.cov,ncol=sizefull)
if(adjustX) {
keep <- 1:(length(ests)-n.newcovar)
ests <- ests[keep]
ests.cov <- ests.cov[keep,keep]
}
if(any(qtl$n.gen[p$idx.qtl]>=4)) {
type <- crosstype
if(type == "4way")
genotypes <- c("AC","BC","AD","BD")
else {
genotypes <- cross.attr$genotypes
if(is.null(genotypes))
genotypes <- as.character(1:max(qtl$n.gen))
}
# just attach rownames for this case
thenames <- "Intercept"
if(length(p$idx.qtl) > 0) { # qtl names
qtlnames <- vector("list", length(p$idx.qtl))
names(qtlnames) <- paste("Q", 1:length(p$idx.qtl), sep="")
for(i in seq(along=p$idx.qtl)) {
qtlnames[[i]] <- paste(qtl$name[p$idx.qtl[i]], genotypes[2:qtl$n.gen[p$idx.qtl[i]]], sep=".")
thenames <- c(thenames, qtlnames[[i]])
}
}
if(p$n.covar > 0) {
covnames <- colnames(covar)[p$idx.covar]
thenames <- c(thenames, covnames)
}
if(p$n.int > 0) { # interactions
if(!is.matrix(p$formula.intmtx)) {
nam <- names(p$formula.intmtx)
p$formula.intmtx <- matrix(p$formula.intmtx, nrow=p$n.int)
colnames(p$formula.intmtx) <- nam
}
for(i in 1:p$n.int) {
wh <- which(p$formula.intmtx[i,]==1)
nam <- colnames(p$formula.intmtx)[wh]
if(length(grep("^Q[0-9]+$", nam[1])) > 0)
curnam <- qtlnames[[nam[1]]]
else
curnam <- nam[1]
for(j in 2:length(nam)) {
if(length(grep("Q[0-9]+", nam[j])) > 0)
curnam <- paste(curnam, qtlnames[[nam[j]]], sep=".")
else
curnam <- paste(curnam, nam[j], sep=".")
}
thenames <- c(thenames, curnam)
}
}
if(length(thenames) ==length(ests)) {
names(ests) <- thenames
dimnames(ests.cov) <- list(thenames, thenames)
}
else
warning("Estimated QTL effects not yet made meaningful for this case.\n ")
}
else {
X <- matrix(z$design.mat,ncol=sizefull)
Z <- matrix(0,nrow=n.ind,ncol=sizefull)
colnames(Z) <- rep("",sizefull)
# mean column
Z[,1] <- 1
colnames(Z)[1] <- "Intercept"
# ZZ stores the main effects matrices, for creating the interactions
ZZ <- vector("list",p$n.qtl+p$n.covar)
curcol <- 1
# covariates
if(p$n.covar > 0) {
for(j in 1:p$n.covar) {
Z[,curcol+j] <- ZZ[[p$n.qtl+j]] <- as.matrix(covar[,p$idx.covar[j],drop=FALSE])
colnames(Z)[curcol+j] <- colnames(ZZ[[p$n.qtl+j]]) <- names(covar)[p$idx.covar[j]]
}
curcol <- curcol + p$n.covar
}
# QTL main effects
for(i in seq(along=p$idx.qtl)) {
if(n.gen[i]==2) {
if(method=="imp") {
if(crosstype == "bc") {
Z[qtl$geno[,p$idx.qtl[i],1]==1,curcol+1] <- -0.5
Z[qtl$geno[,p$idx.qtl[i],1]==2,curcol+1] <- 0.5
} else {
Z[qtl$geno[,p$idx.qtl[i],1]==1,curcol+1] <- -1
Z[qtl$geno[,p$idx.qtl[i],1]==2,curcol+1] <- 1
}
}
else
if(crosstype == "bc") {
Z[,curcol+1] <- (qtl$prob[[p$idx.qtl[i]]][,2] - qtl$prob[[p$idx.qtl[i]]][,1])/2
} else {
Z[,curcol+1] <- (qtl$prob[[p$idx.qtl[i]]][,2] - qtl$prob[[p$idx.qtl[i]]][,1])
}
colnames(Z)[curcol+1] <- thenames[i]
}
else { # 3 genotypes
if(method=="imp") {
Z[qtl$geno[,p$idx.qtl[i],1]==1,curcol+1] <- -1
Z[qtl$geno[,p$idx.qtl[i],1]==3,curcol+1] <- 1
Z[qtl$geno[,p$idx.qtl[i],1]==2,curcol+2] <- 0.5
Z[qtl$geno[,p$idx.qtl[i],1]!=2,curcol+2] <- -0.5
}
else {
Z[,curcol+1] <- qtl$prob[[p$idx.qtl[i]]][,3] - qtl$prob[[p$idx.qtl[i]]][,1]
Z[,curcol+2] <- (qtl$prob[[p$idx.qtl[i]]][,2] - qtl$prob[[p$idx.qtl[i]]][,1] -
qtl$prob[[p$idx.qtl[i]]][,3])/2
}
colnames(Z)[curcol+1:2] <- paste(thenames[i],c("a","d"),sep="")
}
ZZ[[i]] <- Z[,curcol+1:(n.gen[i]-1),drop=FALSE]
curcol <- curcol + n.gen[i]-1
}
if(p$n.int>0) {
for(i in 1:p$n.int) {
if(p$n.int==1)
intform <- p$formula.intmtx
else
intform <- p$formula.intmtx[,i]
tempZ <- matrix(1,ncol=1,nrow=nrow(Z))
colnames(tempZ) <- ""
for(j in seq(along=intform)) {
if(intform[j]==1) {
tZ <- NULL
for(k in 1:ncol(ZZ[[j]])) {
tZZ <- tempZ * ZZ[[j]][,k]
if(all(colnames(tempZ) == ""))
colnames(tZZ) <- colnames(ZZ[[j]])[k]
else
colnames(tZZ) <- paste(colnames(tempZ),colnames(ZZ[[j]])[k],sep=":")
tZ <- cbind(tZ,tZZ)
}
tempZ <- tZ
}
}
Z[,curcol+1:ncol(tempZ)] <- tempZ
colnames(Z)[curcol+1:ncol(tempZ)] <- colnames(tempZ)
curcol <- curcol + ncol(tempZ)
}
}
b <- solve(t(Z) %*% Z, t(Z) %*% X)
ests <- as.numeric(b %*% ests)
ests.cov <- b %*% ests.cov %*% t(b)
names(ests) <- colnames(Z)
dimnames(ests.cov) <- list(colnames(Z),colnames(Z))
}
}
##### output ANOVA table for full model #####
result.full <- matrix(NA, 3, 7)
colnames(result.full) <- c("df", "SS", "MS", "LOD", "%var", "Pvalue(Chi2)",
"Pvalue(F)")
rownames(result.full) <- c("Model", "Error", "Total")
result.full[1,1] <- z$df # model degree of freedom
if(model=="normal") {
# compute the SS for total
if(adjustX) {
mpheno <- mean(pheno)
Rss0 <- sum( (pheno-mpheno)^2 )
Rss0adj <- Rss0x <- sum( lm(pheno ~ Xadjustment$sexpgmcovar)$resid^2 )
OrigModellod <- z$lod
Modellod <- z$lod + length(pheno)/2 * (log10(Rss0x) - log10(Rss0))
} else {
mpheno <- mean(pheno)
Rss0adj <- Rss0 <- sum( (pheno-mpheno)^2 )
OrigModellod <- Modellod <- z$lod
}
# third row, for Total
result.full[3,1] <- length(pheno) - 1 # total degree of freedom
result.full[3,2] <- Rss0adj # total sum of squares
# first row, for Model
result.full[1,1] <- z$df # df for Model
# Variance explained by model
result.full[1,5] <- 100 * (1 - exp(-2*Modellod*log(10)/n.ind))
result.full[1,2] <- Rss0adj * result.full[1,5]/100 # SS for model
result.full[1,3] <- result.full[1,2]/z$df # MS for model
result.full[1,4] <- Modellod # Model LOD score
# Second row, for Error
# df
result.full[2,1] <- result.full[3,1] - result.full[1,1]
# SS
result.full[2,2] <- result.full[3,2] - result.full[1,2]
# MS
result.full[2,3] <- result.full[2,2] / result.full[2,1]
# first row, P values
# P value (chi2) for model
result.full[1,6] <- 1 - pchisq(2*log(10)*Modellod, z$df)
# P value (F statistics) for model
df0 <- result.full[3,1]; df1 <- result.full[2,1];
Rss1 <- result.full[2,2]
Fstat <- ((Rss0adj-Rss1)/(df0-df1)) / (Rss1/df1)
result.full[1,7] <- 1 - pf(Fstat, df0-df1, df1)
}
else {
# third row, for Total
result.full[3,1] <- length(pheno) - 1 # total degree of freedom
result.full[3,2] <- NA # total sum of squares
# first row, for Model
result.full[1,1] <- z$df # df for Model
# Variance explained by model
result.full[1,5] <- 100 * (1 - exp(-2*z$lod*log(10)/n.ind))
result.full[1,2] <- NA # SS for model
result.full[1,3] <- NA # MS for model
result.full[1,4] <- z$lod # Model LOD score
OrigModellod <- Modellod <- z$lod
# Second row, for Error
# df
result.full[2,1] <- result.full[3,1] - result.full[1,1]
# SS
result.full[2,2] <- NA
# MS
result.full[2,3] <- NA
# first row, P values
# P value (chi2) for model
result.full[1,6] <- 1 - pchisq(2*log(10)*z$lod, z$df)
# P value (F statistics) for model
result.full[1,7] <- NA
}
############# Finish ANOVA table for full model
# initialize output object
output <- NULL
output$result.full <- result.full
# drop one at a time?
if(dropone && (p$n.qtl+n.origcovar)>1) {
# user wants to do drop one term at a time and output anova table
# get the terms etc. for input formula
f.terms <- terms(formula)
f.order <- attr(f.terms, "order")
f.label <- attr(f.terms, "term.labels")
# initialize output matrix
# ANOVA table will have five columns, e.g., df,Type III SS,
# LOD, %var, Pvalue for each dropping term
# Full model result will not be in this table
result <- matrix(0, length(f.order), 7)
colnames(result) <- c("df", "Type III SS", "LOD", "%var", "F value",
"Pvalue(Chi2)", "Pvalue(F)")
rownames(result) <- rep("",length(f.order))
drop.term.name <- NULL
formulas <- rep("", length(f.order))
lods <- rep(NA, length(f.order))
for( i in (1:length(f.order)) ) {
# loop thru all terms in formula, from the highest order
# the label of the term to be droped
label.term.drop <- f.label[i]
### find the corresponding QTL name for this term ###
# This is used for output ANOVA table
if(f.order[i] == 1) {
# this is a first order term
# if the term label is like Q(q)1, Q(q)2, etc., then it's a QTL
if( length(grep("Q[0-9]", label.term.drop, ignore.case=TRUE)) != 0) {
idx.qtlname <- as.integer(substr(label.term.drop, 2, nchar(label.term.drop)))
drop.term.name[i] <- qtl$name[idx.qtlname]
}
else { # this is a covariate
drop.term.name[i] <- label.term.drop
}
}
else {
# this is a 2nd (or higher)order and the term is a string like "Q2:Q3:C1"
# I use strsplit to split it to a string vector "Q2" "Q3" "C1".
# then take out 2 and 3 as integer. Then find out the
# QTL name from the input QTL object and concatenate them
tmp.str <- strsplit(label.term.drop,":")[[1]]
for(j in 1:length(tmp.str)) {
if( length(grep("Q[0-9]", tmp.str[j], ignore.case=TRUE)) != 0 ) {
# this is a QTL
idx.qtlname <- as.integer(substr(tmp.str[j], 2, nchar(tmp.str[j])))
tmp.str[j] <- qtl$name[idx.qtlname]
}
if(j == 1) # first term
drop.term.name[i] <- tmp.str[j]
else # not the first term
drop.term.name[i] <- paste(drop.term.name[i], tmp.str[j], sep=":")
}
}
### Finish QTL name ###
# find the indices of the term(s) to be dropped
# All terms contain label.term.drop will be dropped
idx.term.drop <- NULL
tmp.str.drop <- tolower(strsplit(label.term.drop,":")[[1]])
for(j in 1:length(f.label)) {
tmp.str.label <- tolower(strsplit(f.label[j], ":")[[1]])
if(all(tmp.str.drop %in% tmp.str.label))
idx.term.drop <- c(idx.term.drop, j)
}
# the indices of term(s) to be kept
idx.term.kept <- setdiff(1:length(f.order), idx.term.drop)
#### regenerate a formula with the kept terms additive ###
if(length(idx.term.kept) == 0) # nothing left after drop label.term.drop
stop("There will be nothing left if drop ", drop.term.name[i])
else {
# All terms for idx.term.kept will be additive
formula.new <- as.formula(paste("y~", paste(f.label[idx.term.kept], collapse="+"), sep=""))
}
### Start fitting model again
# parse the input formula
p.new <- parseformula(formula.new, qtl$altname, colnames(covar))
n.gen.QC <- c(n.gen[p.new$idx.qtl]-1, rep(1, p.new$n.covar))
formulas[i] <- deparseQTLformula(formula.new)
# covariate to be passed to C function
covar.C <- NULL
if(!is.null(p.new$idx.covar))
covar.C <- as.matrix(covar[,p.new$idx.covar,drop=FALSE])
if(method != "imp") {
# form genotype probabilities as a matrix
prob <- matrix(ncol=sum(qtl$n.gen[p.new$idx.qtl]), nrow=n.ind)
curcol <- 0
for(z in p.new$idx.qtl) {
prob[,curcol+1:n.gen[z]] <- qtl$prob[[z]]
curcol <- curcol + n.gen[z]
}
}
if(adjustX) { # need to include X chromosome covariates
n.newcovar <- ncol(Xadjustment$sexpgmcovar)
n.gen.QC <- c(n.gen.QC, rep(1, n.newcovar))
p.new$n.covar <- p.new$n.covar + n.newcovar
covar.C <- cbind(covar.C, Xadjustment$sexpgmcovar)
sizefull <- sizefull + n.newcovar
if(p.new$n.int==1)
p.new$formula.intmtx <- c(p.new$formula.intmtx, rep(0,n.newcovar))
if(p.new$n.int>1) {
for(i2 in 1:n.newcovar)
p.new$formula.intmtx <- rbind(p.new$formula.intmtx, rep(0,p.new$n.int))
}
}
# call C function fit model
if(model=="normal") {
if(method == "imp") {
z <- .C("R_fitqtl_imp",
as.integer(n.ind), # number of individuals
as.integer(p.new$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.integer(n.draws), # number of draws
as.integer(qtl$geno[,p.new$idx.qtl,]), # genotypes for selected marker
as.integer(p.new$n.covar), # number of covariate
as.double(covar.C), # covariate
as.integer(p.new$formula.intmtx), # formula matrix for interactive terms
as.integer(p.new$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(0),
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
as.double(rep(0,sizefull)),
as.double(rep(0,sizefull*sizefull)),
as.double(rep(0,n.ind*sizefull)),
matrix.rank=as.integer(0),
resid=as.double(rep(0,n.ind)), # on return, the average residuals across imputations
PACKAGE="qtl")
}
else {
z <- .C("R_fitqtl_hk",
as.integer(n.ind), # number of individuals
as.integer(p.new$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.double(prob),
as.integer(p.new$n.covar), # number of covariate
as.double(covar.C), # covariate
as.integer(p.new$formula.intmtx), # formula matrix for interactive terms
as.integer(p.new$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(0),
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
as.double(rep(0,sizefull)),
as.double(rep(0,sizefull*sizefull)),
as.double(rep(0,n.ind*sizefull)),
matrix.rank=as.integer(0),
resid=as.double(rep(0,n.ind)), # on return, the residuals
PACKAGE="qtl")
}
}
else { # binary trait
if(method=="imp") {
z <- .C("R_fitqtl_imp_binary",
as.integer(n.ind), # number of individuals
as.integer(p.new$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.integer(n.draws), # number of draws
as.integer(qtl$geno[,p.new$idx.qtl,]), # genotypes for selected marker
as.integer(p.new$n.covar), # number of covariate
as.double(covar.C), # covariate
as.integer(p.new$formula.intmtx), # formula matrix for interactive terms
as.integer(p.new$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(0),
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
as.double(rep(0,sizefull)),
as.double(rep(0,sizefull*sizefull)),
as.double(rep(0,n.ind*sizefull)),
as.double(tol),
as.integer(maxit),
matrix.rank=as.integer(0),
PACKAGE="qtl")
}
else {
z <- .C("R_fitqtl_hk_binary",
as.integer(n.ind), # number of individuals
as.integer(p.new$n.qtl), # number of qtls
as.integer(n.gen.QC), # number of genotypes QTLs and covariates
as.double(prob),
as.integer(p.new$n.covar), # number of covariate
as.double(covar.C), # covariate
as.integer(p.new$formula.intmtx), # formula matrix for interactive terms
as.integer(p.new$n.int), # number of interactions in the formula
as.double(pheno), # phenotype
as.integer(0),
# return variables
lod=as.double(0), # LOD score
df=as.integer(0), # degree of freedom
as.double(rep(0,sizefull)),
as.double(rep(0,sizefull*sizefull)),
as.double(rep(0,n.ind*sizefull)),
# convergence
as.double(tol),
as.integer(maxit),
matrix.rank = as.integer(0),
PACKAGE="qtl")
}
}
if(model=="normal" && adjustX) # adjust for X chromosome covariates
z$lod <- z$lod + length(pheno)/2 * (log10(Rss0x) - log10(Rss0))
# record the result for dropping this term
# df
result[i,1] <- result.full[1,1] - z$df
# LOD score
result[i,3] <- Modellod - z$lod
# % variance explained
result[i,4] <- result.full[1,5] - 100*(1 - 10^(-2*z$lod/n.ind))
# lod score for reduced model
lods[i] <- z$lod
# Type III SS for this term - computed from %var
if(model=="normal")
result[i,2] <- result.full[3,2] * result[i,4] / 100
else
result[i,2] <- NA
# F value
if(model=="normal") {
df0 <- length(pheno) - z$df - 1; df1 <- result.full[2,1];
Rss0p <- result.full[2,2] + result[i,2];
Rss1p <- result.full[2,2]
Fstat <- ((Rss0p-Rss1p)/(df0-df1)) / (Rss1/df1)
result[i,5] <- Fstat
# P value (F)
result[i,7] <- 1 - pf(Fstat, df0-df1, df1)
}
else # ignore F stat for binary trait
result[i,c(5,7)] <- NA
# P value (chi2)
result[i,6] <- 1 - pchisq(2*log(10)*result[i,3], result[i,1])
# assign row name
rownames(result)[i] <- drop.term.name[i]
} # finish dropping terms loop
attr(result, "formulas") <- formulas
attr(result, "lods") <- lods
# assign output object
output$result.drop <- result
} ## if(dropone)
if(get.ests)
output$ests <- list(ests=ests, covar=ests.cov)
output$lod <- output$result.full[1,4]
class(output) <- "fitqtl"
attr(output, "method") <- method
attr(output, "model") <- model
attr(output, "formula") <- deparseQTLformula(formula)
attr(output, "type") <- qtl$type
attr(output, "nind") <- length(pheno)
attr(output, "matrix.rank") <- matrix.rank
attr(output, "matrix.ncol") <- matrix.ncol
if(!is.null(residuals))
attr(output, "residuals") <- residuals
output
}
######################################################################
# checkformula
#
# check that input formula satisfies our imposed hiearchy: that
# main effects for terms in any interactions are also included
######################################################################
checkformula <-
function(formula, qtl.name, covar.name)
{
factors <- attr(terms(formula), "factors")
altform <- deparseQTLformula(formula)
if(sum(factors[1,])==0) factors <- factors[-1,,drop=FALSE]
rn <- rownames(factors)
# if mentions of "q1" or such, convert to "Q1" and such
g <- grep("^[Qq][0-9]+$", rn)
todrop <- NULL
if(length(g) >= 1) {
rownames(factors)[g] <- rn[g] <- toupper(rn[g])
if(any(table(rn) > 1)) { # now there are some duplicates
urn <- unique(rn)
for(i in urn) {
wh <- which(rn == i)
if(length(wh) > 1) {
factors[wh[1],] <- apply(factors[wh,], 2, sum)
todrop <- c(todrop, wh[-1])
rownames(factors)[wh[1]] <- rn[wh[1]]
}
}
}
}
if(length(todrop) > 0) factors <- factors[-todrop,]
rn <- rownames(factors)
if(!missing(qtl.name) || !missing(covar.name)) {
m <- match(rn, c(qtl.name, covar.name))
if(any(is.na(m)))
warning("Terms ", paste(rn[is.na(m)], collapse=" "), " not understood.")
}
# paste rows together
zo <- factors
zo[zo>1] <- 1
pzo <- apply(zo, 2, paste, collapse="")
nt <- apply(zo, 2, sum)
# form binary representations
maxnt <- max(nt)
v <- vector("list", maxnt)
for(i in 2:maxnt) {
v[[i]] <- mybinaryrep(i)
v[[i]] <- v[[i]][,-ncol(v[[i]])+c(0,1)]
}
# for each higher-order column, form all lower-order terms
for(i in which(nt > 1)) {
cur <- zo[,i]
wh <- which(cur==1)
z <- v[[nt[i]]]
zz <- matrix(0, ncol=ncol(z), nrow=length(cur))
for(j in seq(along=wh))
zz[wh[j],] <- z[j,]
pzo <- unique(c(pzo, apply(zz, 2, paste, collapse="")))
}
zo <- matrix(as.numeric(unlist(strsplit(pzo, ""))), ncol=length(pzo))
nt <- apply(zo, 2, sum)
zo <- zo[,order(nt, apply(1-zo, 2, paste, collapse="")), drop=FALSE]
rownames(zo) <- rn
# form column names
theterms <- apply(zo, 2, function(a, b) paste(b[as.logical(a)], collapse=":"),
rownames(zo))
as.formula(paste("y ~ ", paste(theterms, collapse=" + ")))
}
#####################################################################
#
# parseformula
#
# Function to be called by fitqtl. It's used to
# parse the input formula
#
# This is the internal function and not supposed to be used by user
#
#####################################################################
parseformula <- function(formula, qtl.dimname, covar.dimname)
{
# The terms for input formula
f.formula <- terms(formula)
order.term <- attr(f.formula, "order") # get the order of the terms
idx.term <- which(order.term==1) # get the first order terms
label.term <- attr(f.formula, "term.labels")[idx.term]
formula.mtx <- attr(f.formula, "factors") # formula matrix
idx.qtl <- NULL
idx.covar <- NULL
# loop thru all terms and find out how many QTLs and covariates
# are there in the formula. Construct idx.qtl and idx.covar at the same time
termisqtl <- rep(0, length(idx.term))
for (i in 1:length(idx.term)) {
# find out if there term is a QTL or a covariate
# ignore the case for QTLs, e.g., Q1 is equivalent to q1
idx.tmp <- grep(paste(label.term[i],"$", sep=""),
qtl.dimname, ignore.case=TRUE)
if( length(idx.tmp) ) { # it's a QTL
idx.qtl <- c(idx.qtl, idx.tmp)
termisqtl[i] <- 1
}
else if(label.term[i] %in% covar.dimname) # it's a covariate
idx.covar <- c(idx.covar, which(label.term[i]==covar.dimname))
else
stop("Unrecognized term ", label.term[i], " in formula")
}
n.qtl <- length(idx.qtl) # number of QTLs in formula
n.covar <- length(idx.covar) # number of covariates in formula
# now idx.qtl and idx.covar are the indices for genotype
# and covariate matrices according to input formula
# loop thru all terms again and reorganize formula.mtx
formula.idx <- NULL
ii <- 1
jj <- 1
for (i in 1:length(idx.term)) {
# if(label.term[i] %in% qtl.dimname) { # it's a QTL
if(termisqtl[i]) {
formula.idx <- c(formula.idx, ii)
ii <- ii+1
}
else { # it's a covariate
formula.idx <- c(formula.idx, jj+n.qtl)
jj <- jj+1
}
}
# reorganize formula.mtx according to formula.idx
# remove the first row (for y)
formula.mtx <- formula.mtx[2:nrow(formula.mtx),]
# rearrange the rows according to formula.idx if there's more than one row
if(length(formula.idx) > 1)
formula.mtx <- formula.mtx[order(formula.idx),]
# take out only part of the matrix for interactions and pass to C function
# all the input QTLs and covariates for C function will be additive
n.int <- length(order.term) - length(idx.term) # number of interactions
if(n.int != 0)
formula.intmtx <- formula.mtx[,(length(idx.term)+1):length(order.term)]
else # no interaction terms
formula.intmtx <- NULL
# return object
result <- NULL
result$idx.qtl <- idx.qtl
result$n.qtl <- n.qtl
result$idx.covar <- idx.covar
result$n.covar <- n.covar
result$formula.intmtx <- formula.intmtx
result$n.int <- n.int
result
}
#####################################################################
#
# summary.fitqtl
#
#####################################################################
summary.fitqtl <-
function(object, pvalues=TRUE, simple=FALSE, ...)
{
if(!inherits(object, "fitqtl"))
stop("Input should have class \"fitqtl\".")
# this is just an interface.
if("ests" %in% names(object)) {
ests <- object$ests$ests
se <- sqrt(diag(object$ests$covar))
object$ests <- cbind(est=ests, SE=se, t=ests/se)
}
class(object) <- "summary.fitqtl"
if(simple) pvalues <- FALSE
attr(object, "pvalues") <- pvalues
attr(object, "simple") <- simple
object
}
#####################################################################
#
# print.summary.fitqtl
#
#####################################################################
print.summary.fitqtl <- function(x, ...)
{
cat("\n")
cat("\t\tfitqtl summary\n\n")
meth <- attr(x, "method")
mod <- attr(x, "model")
if(is.null(mod)) mod <- "normal"
if(meth=="imp") meth <- "multiple imputation"
else if(meth=="hk") meth <- "Haley-Knott regression"
cat("Method:", meth, "\n")
cat("Model: ", mod, "phenotype\n")
cat("Number of observations :", attr(x, "nind"), "\n\n")
# print ANOVA table for full model
cat("Full model result\n")
cat("---------------------------------- \n")
cat("Model formula:")
w <- options("width")[[1]]
printQTLformulanicely(attr(x, "formula"), " ", w+5, w)
cat("\n")
pval <- attr(x, "pvalues")
simple <- attr(x, "simple")
if(!is.null(pval) && !pval)
x$result.full <- x$result.full[,-ncol(x$result.full)+(0:1)]
if(mod=="binary" || (!is.null(simple) && simple))
x$result.full <- x$result.full[1,-c(2:3,7),drop=FALSE]
print(x$result.full, quote=FALSE, na.print="")
cat("\n")
# print ANOVA table for dropping one at a time analysis (if any)
if("result.drop" %in% names(x)) {
cat("\n")
cat("Drop one QTL at a time ANOVA table: \n")
cat("---------------------------------- \n")
# use printCoefmat instead of print.data.frame
# make sure the last column is P value
if(!is.null(pval) && !pval)
x$result.drop <- x$result.drop[,-ncol(x$result.drop)+(0:1)]
if(mod=="binary" || (!is.null(simple) && simple))
x$result.drop <- x$result.drop[,-c(2,5,7),drop=FALSE]
printCoefmat(x$result.drop, digits=4, cs.ind=1, P.values=TRUE, has.Pvalue=TRUE)
cat("\n")
}
if("ests" %in% names(x)) {
cat("\n")
cat("Estimated effects:\n")
cat("-----------------\n")
printCoefmat(x$ests,digits=4)
cat("\n")
}
}
######################################################################
# binary repreentation of the numbers 1...2^n;
# used in checkformula
######################################################################
mybinaryrep <-
function(n)
{
lx <- 2^n
x <- 1:lx
ans <- 0:(n-1)
x <- matrix(rep(x,rep(n, lx)), ncol=lx)
(x %/% 2^ans) %% 2
}
######################################################################
# deparseQTLformula: turn QTL formula into a string
######################################################################
deparseQTLformula <-
function(formula, reorderterms=FALSE)
{
if(is.null(formula)) return(NULL)
if(reorderterms) {
if(is.character(formula)) formula <- as.formula(formula)
factors <- colnames(attr(terms(formula), "factors"))
wh <- grep("^[Qq][0-9]+$", factors)
if(length(wh)>0)
factors[wh] <- paste("Q", sort(as.numeric(substr(factors[wh], 2, nchar(factors[wh])))), sep="")
wh <- grep(":", factors)
if(length(wh)>0) {
temp <- strsplit(factors[wh], ":")
temp <- sapply(temp, function(a) {
wh <- grep("^[Qq][0-9]+$", a)
if(any(wh)) a[wh] <- paste("Q", sort(as.numeric(substr(a[wh], 2, nchar(a[wh])))), sep="")
paste(a[order(is.na(match(seq(along=a),wh)))], collapse=":")
})
factors[wh] <- temp
}
return(paste("y ~ ", paste(factors, collapse=" + "), sep=""))
}
if(is.character(formula)) return(formula)
paste(as.character(formula)[c(2,1,3)], collapse=" ")
}
printQTLformulanicely <-
function(formula, header, width, width2, sep=" ")
{
if(!is.character(formula)) formula <- deparseQTLformula(formula)
thetext <- unlist(strsplit(formula, " "))
if(missing(width2)) width2 <- width
nleft <- width - nchar(header)
nsep <- nchar(sep)
if(length(thetext) < 2) cat("", thetext, "\n", sep=sep)
else {
z <- paste("", thetext[1], sep=sep, collapse=sep)
for(j in 2:length(thetext)) {
if(nchar(z) + nsep + nchar(thetext[j]) > nleft) {
cat(z, "\n")
nleft <- width2
z <- paste(header, thetext[j], sep=sep)
}
else {
z <- paste(z, thetext[j], sep=sep)
}
}
cat(z, "\n")
}
}
# end of fitqtl.R
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.